diff options
Diffstat (limited to 'doc/legacy')
42 files changed, 12149 insertions, 0 deletions
diff --git a/doc/legacy/Makefile.am b/doc/legacy/Makefile.am new file mode 100644 index 00000000000..b2caabaa2f3 --- /dev/null +++ b/doc/legacy/Makefile.am @@ -0,0 +1,3 @@ +info_TEXINFOS = user-guide.texi +CLEANFILES = *~ +DISTCLEANFILES = .deps/*.P *.info *vti diff --git a/doc/legacy/advanced-stripe.odg b/doc/legacy/advanced-stripe.odg Binary files differnew file mode 100644 index 00000000000..7686d7091b2 --- /dev/null +++ b/doc/legacy/advanced-stripe.odg diff --git a/doc/legacy/advanced-stripe.pdf b/doc/legacy/advanced-stripe.pdf Binary files differnew file mode 100644 index 00000000000..ec8b03dcfbb --- /dev/null +++ b/doc/legacy/advanced-stripe.pdf diff --git a/doc/legacy/colonO-icon.jpg b/doc/legacy/colonO-icon.jpg Binary files differnew file mode 100644 index 00000000000..3e66f7a2775 --- /dev/null +++ b/doc/legacy/colonO-icon.jpg diff --git a/doc/legacy/docbook/Administration_Guide.ent b/doc/legacy/docbook/Administration_Guide.ent new file mode 100644 index 00000000000..3381b2bfec1 --- /dev/null +++ b/doc/legacy/docbook/Administration_Guide.ent @@ -0,0 +1,4 @@ +<!ENTITY PRODUCT "Documentation"> +<!ENTITY BOOKID "Administration_Guide"> +<!ENTITY YEAR "2012"> +<!ENTITY HOLDER "Red Hat Inc"> diff --git a/doc/legacy/docbook/Administration_Guide.xml b/doc/legacy/docbook/Administration_Guide.xml new file mode 100644 index 00000000000..483855b1a02 --- /dev/null +++ b/doc/legacy/docbook/Administration_Guide.xml @@ -0,0 +1,27 @@ +<?xml version='1.0' encoding='utf-8' ?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<book> + <xi:include href="Book_Info.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="Preface.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="gfs_introduction.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_start_stop_daemon.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_console.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_storage_pools.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_setting_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_settingup_clients.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_managing_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_geo-replication.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_directory_Quota.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_monitoring_workload.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_ACLs.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_UFO.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_Hadoop.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_troubleshooting.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="admin_commandref.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="glossary.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="Revision_History.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> +</book> + diff --git a/doc/legacy/docbook/Author_Group.xml b/doc/legacy/docbook/Author_Group.xml new file mode 100644 index 00000000000..f3fa3174037 --- /dev/null +++ b/doc/legacy/docbook/Author_Group.xml @@ -0,0 +1,17 @@ +<?xml version='1.0' encoding='utf-8' ?> +<!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<authorgroup> + <author> + <firstname>Divya</firstname> + <surname>Muntimadugu</surname> + <affiliation> + <orgname>Red Hat</orgname> + <orgdiv>Engineering Content Services</orgdiv> + </affiliation> + <email>divya@redhat.com</email> + </author> +</authorgroup> + diff --git a/doc/legacy/docbook/Book_Info.xml b/doc/legacy/docbook/Book_Info.xml new file mode 100644 index 00000000000..6be6a7816ca --- /dev/null +++ b/doc/legacy/docbook/Book_Info.xml @@ -0,0 +1,28 @@ +<?xml version='1.0' encoding='utf-8' ?> +<!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<bookinfo id="book-Administration_Guide-Administration_Guide"> + <title>Administration Guide</title> + <subtitle>Using Gluster File System <remark> Beta 3</remark> </subtitle> + <productname>Gluster File System</productname> + <productnumber>3.3</productnumber> + <edition>1</edition> + <pubsnumber>1</pubsnumber> + <abstract> + <para> + This guide describes Gluster File System (GlusterFS) and provides information on how to configure, operate, and manage GlusterFS. + </para> + </abstract> + <corpauthor> + <inlinemediaobject> + <imageobject> + <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" /> + </imageobject> + </inlinemediaobject> + </corpauthor> + <xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> + <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" /> +</bookinfo> + diff --git a/doc/legacy/docbook/Chapter.xml b/doc/legacy/docbook/Chapter.xml new file mode 100644 index 00000000000..4a1cef872c8 --- /dev/null +++ b/doc/legacy/docbook/Chapter.xml @@ -0,0 +1,33 @@ +<?xml version='1.0' encoding='utf-8' ?> +<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<chapter id="chap-Administration_Guide-Test_Chapter"> + <title>Test Chapter</title> + <para> + This is a test paragraph + </para> + <section id="sect-Administration_Guide-Test_Chapter-Test_Section_1"> + <title>Test Section 1</title> + <para> + This is a test paragraph in a section + </para> + </section> + + <section id="sect-Administration_Guide-Test_Chapter-Test_Section_2"> + <title>Test Section 2</title> + <para> + This is a test paragraph in Section 2 + <orderedlist> + <listitem> + <para> + listitem text + </para> + </listitem> + </orderedlist> + </para> + </section> + +</chapter> + diff --git a/doc/legacy/docbook/Preface.xml b/doc/legacy/docbook/Preface.xml new file mode 100644 index 00000000000..320311906d0 --- /dev/null +++ b/doc/legacy/docbook/Preface.xml @@ -0,0 +1,24 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --> +<!DOCTYPE preface PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<preface id="pref-Administration_Guide-Preface"> + <title>Preface</title> + <para>This guide describes how to configure, operate, and manage Gluster File System (GlusterFS).</para> + <section> + <title>Audience</title> + <para>This guide is intended for Systems Administrators interested in configuring and managing GlusterFS.</para> + <para>This guide assumes that you are familiar with the Linux operating system, concepts of File System, GlusterFS concepts, and GlusterFS Installation</para> + </section> + <section> + <title>License</title> + <para>The License information is available at <ulink url="http://www.redhat.com/licenses/rhel_rha_eula.html"/>.</para> + </section> + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Conventions.xml"/> + <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Feedback.xml"> + <xi:fallback xmlns:xi="http://www.w3.org/2001/XInclude"> <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Feedback.xml"/> + </xi:fallback> + </xi:include> +</preface> diff --git a/doc/legacy/docbook/Revision_History.xml b/doc/legacy/docbook/Revision_History.xml new file mode 100644 index 00000000000..09320821fb0 --- /dev/null +++ b/doc/legacy/docbook/Revision_History.xml @@ -0,0 +1,27 @@ +<?xml version='1.0' encoding='utf-8' ?> +<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<appendix id="appe-Administration_Guide-Revision_History"> + <title>Revision History</title> + <simpara> + <revhistory> + <revision> + <revnumber>1-0</revnumber> + <date>Thu Apr 5 2012</date> + <author> + <firstname>Divya</firstname> + <surname>Muntimadugu</surname> + <email>divya@redhat.com</email> + </author> + <revdescription> + <simplelist> + <member>Draft </member> + </simplelist> + </revdescription> + </revision> + </revhistory> + </simpara> +</appendix> + diff --git a/doc/legacy/docbook/admin_ACLs.xml b/doc/legacy/docbook/admin_ACLs.xml new file mode 100644 index 00000000000..156e52c17f2 --- /dev/null +++ b/doc/legacy/docbook/admin_ACLs.xml @@ -0,0 +1,206 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter id="chap-Administration_Guide-ACLs"> + <title>POSIX Access Control Lists </title> + <para>POSIX Access Control Lists (ACLs) allows you to assign different permissions for different users or +groups even though they do not correspond to the original owner or the owning group. + </para> + <para>For example: User john creates a file but does not want to allow anyone to do anything with this +file, except another user, antony (even though there are other users that belong to the group john). +</para> + <para>This means, in addition to the file owner, the file group, and others, additional users and groups can +be granted or denied access by using POSIX ACLs. +</para> + <section id="sect-Administration_Guide-ACLs-Activating_ACLs"> + <title>Activating POSIX ACLs Support </title> + <para>To use POSIX ACLs for a file or directory, the partition of the file or directory must be mounted with +POSIX ACLs support. +</para> + <section id="sect-Administration_Guide-ACLs-Activating_ACLs-Server"> + <title>Activating POSIX ACLs Support on Sever </title> + <para>To mount the backend export directories for POSIX ACLs support, use the following command: +</para> + <para><command># mount -o acl <replaceable>device-name</replaceable><replaceable>partition</replaceable></command> +</para> + <para>For example: +</para> + <para><command># mount -o acl /dev/sda1 /export1 </command></para> + <para>Alternatively, if the partition is listed in the /etc/fstab file, add the following entry for the partition +to include the POSIX ACLs option: +</para> + <para><command>LABEL=/work /export1 ext3 rw, acl 14 </command></para> + </section> + <section> + <title>Activating POSIX ACLs Support on Client </title> + <para>To mount the glusterfs volumes for POSIX ACLs support, use the following command: +</para> + <para><command># mount –t glusterfs -o acl <replaceable>severname:volume-id</replaceable><replaceable>mount point</replaceable></command> +</para> + <para>For example: +</para> + <para><command># mount -t glusterfs -o acl 198.192.198.234:glustervolume /mnt/gluster</command> +</para> + </section> + </section> + <section> + <title>Setting POSIX ACLs </title> + <para>You can set two types of POSIX ACLs, that is, access ACLs and default ACLs. You can use +access ACLs to grant permission for a specific file or directory. You can use default ACLs only +on a directory but if a file inside that directory does not have an ACLs, it inherits the permissions of +the default ACLs of the directory. +</para> + <para>You can set ACLs for per user, per group, for users not in the user group for the file, and via the +effective right mask. +</para> + <section> + <title>Setting Access ACLs </title> + <para>You can apply access ACLs to grant permission for both files and directories. +</para> + <para><emphasis role="bold">To set or modify Access ACLs</emphasis> +</para> + <para>You can set or modify access ACLs use the following command: +</para> + <para><command># setfacl –m <replaceable>entry type</replaceable> file </command></para> + <para>The ACL entry types are the POSIX ACLs representations of owner, group, and other. +</para> + <para>Permissions must be a combination of the characters <command>r</command> (read), <command>w</command> (write), and <command>x</command> (execute). You must +specify the ACL entry in the following format and can specify multiple entry types separated by +commas. +</para> + <informaltable frame="all"> + <tgroup cols="2"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <thead> + <row> + <entry>ACL Entry</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry>u:uid:<permission> </entry> + <entry>Sets the access ACLs for a user. You can specify user name or UID </entry> + </row> + <row> + <entry>g:gid:<permission> </entry> + <entry>Sets the access ACLs for a group. You can specify group name or GID. </entry> + </row> + <row> + <entry>m:<permission> </entry> + <entry>Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries. </entry> + </row> + <row> + <entry>o:<permission> </entry> + <entry>Sets the access ACLs for users other than the ones in the group for the file. </entry> + </row> + </tbody> + </tgroup> + </informaltable> + <para>If a file or directory already has an POSIX ACLs, and the setfacl command is used, the additional +permissions are added to the existing POSIX ACLs or the existing rule is modified. +</para> + <para>For example, to give read and write permissions to user antony: +</para> + <para><command># setfacl -m u:antony:rw /mnt/gluster/data/testfile </command></para> + </section> + <section> + <title>Setting Default ACLs </title> + <para>You can apply default ACLs only to directories. They determine the permissions of a file system +objects that inherits from its parent directory when it is created. +</para> + <para>To set default ACLs +</para> + <para>You can set default ACLs for files and directories using the following command: +</para> + <para><command># setfacl –m –-set <replaceable>entry type directory</replaceable></command> +</para> + <para>For example, to set the default ACLs for the /data directory to read for users not in the user group: +</para> + <para><command># setfacl –m --set o::r /mnt/gluster/data </command></para> + <para><note> + <para>An access ACLs set for an individual file can override the default ACLs permissions. +</para> + </note></para> + <para><emphasis role="bold">Effects of a Default ACLs </emphasis></para> + <para>The following are the ways in which the permissions of a directory's default ACLs are passed to the +files and subdirectories in it: +</para> + <itemizedlist> + <listitem> + <para>A subdirectory inherits the default ACLs of the parent directory both as its default ACLs and as an +access ACLs. +</para> + </listitem> + <listitem> + <para>A file inherits the default ACLs as its access ACLs. +</para> + </listitem> + </itemizedlist> + </section> + </section> + <section> + <title>Retrieving POSIX ACLs </title> + <para>You can view the existing POSIX ACLs for a file or directory. +</para> + <para><emphasis role="bold">To view existing POSIX ACLs </emphasis></para> + <itemizedlist> + <listitem> + <para>View the existing access ACLs of a file using the following command: +</para> + <para><command># getfacl <replaceable>path/filename</replaceable></command> +</para> + <para>For example, to view the existing POSIX ACLs for sample.jpg +</para> + <programlisting># getfacl /mnt/gluster/data/test/sample.jpg +# owner: antony +# group: antony +user::rw- +group::rw- +other::r--</programlisting> + </listitem> + <listitem> + <para>View the default ACLs of a directory using the following command: +</para> + <para><command># getfacl <replaceable>directory name</replaceable></command></para> + <para>For example, to view the existing ACLs for /data/doc +</para> + <programlisting># getfacl /mnt/gluster/data/doc +# owner: antony +# group: antony +user::rw- +user:john:r-- +group::r-- +mask::r-- +other::r-- +default:user::rwx +default:user:antony:rwx +default:group::r-x +default:mask::rwx +default:other::r-x</programlisting> + </listitem> + </itemizedlist> + </section> + <section> + <title>Removing POSIX ACLs </title> + <para>To remove all the permissions for a user, groups, or others, use the following command: +</para> + <para><command># setfacl -x <replaceable>ACL entry type file</replaceable></command></para> + <para>For example, to remove all permissions from the user antony: +</para> + <para><command># setfacl -x u:antony /mnt/gluster/data/test-file</command></para> + </section> + <section> + <title>Samba and ACLs </title> + <para>If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs are enabled by default. +Samba has been compiled with the <command>--with-acl-support</command> option, so no special flags are required +when accessing or mounting a Samba share. +</para> + </section> + <section> + <title>NFS and ACLs </title> + <para>Currently we do not support ACLs configuration through NFS, i.e. setfacl and getfacl commands do +not work. However, ACLs permissions set using Gluster Native Client applies on NFS mounts. +</para> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_Hadoop.xml b/doc/legacy/docbook/admin_Hadoop.xml new file mode 100644 index 00000000000..08bac89615b --- /dev/null +++ b/doc/legacy/docbook/admin_Hadoop.xml @@ -0,0 +1,244 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<chapter id="chap-Administration_Guide-Hadoop"> + <title>Managing Hadoop Compatible Storage </title> + <para>GlusterFS provides compatibility for Apache Hadoop and it uses the standard file system +APIs available in Hadoop to provide a new storage option for Hadoop deployments. Existing +MapReduce based applications can use GlusterFS seamlessly. This new functionality opens up data +within Hadoop deployments to any file-based or object-based application. + + </para> + <section id="sect-Administration_Guide-Hadoop-Introduction-Architecture_Overview"> + <title>Architecture Overview </title> + <para>The following diagram illustrates Hadoop integration with GlusterFS: +<mediaobject> + <imageobject> + <imagedata fileref="images/Hadoop_Architecture.png"/> + </imageobject> + </mediaobject> + </para> + </section> + <section id="sect-Administration_Guide-Hadoop-Introduction-Advantages"> + <title>Advantages </title> + <para> +The following are the advantages of Hadoop Compatible Storage with GlusterFS: + + + </para> + <itemizedlist> + <listitem> + <para>Provides simultaneous file-based and object-based access within Hadoop. +</para> + </listitem> + <listitem> + <para>Eliminates the centralized metadata server. +</para> + </listitem> + <listitem> + <para>Provides compatibility with MapReduce applications and rewrite is not required. +</para> + </listitem> + <listitem> + <para>Provides a fault tolerant file system. +</para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Preparing to Install Hadoop Compatible Storage</title> + <para>This section provides information on pre-requisites and list of dependencies that will be installed +during installation of Hadoop compatible storage. + +</para> + <section id="sect-Administration_Guide-Hadoop-Preparation"> + <title>Pre-requisites </title> + <para>The following are the pre-requisites to install Hadoop Compatible +Storage : + + </para> + <itemizedlist> + <listitem> + <para>Hadoop 0.20.2 is installed, configured, and is running on all the machines in the cluster. +</para> + </listitem> + <listitem> + <para>Java Runtime Environment +</para> + </listitem> + <listitem> + <para>Maven (mandatory only if you are building the plugin from the source) +</para> + </listitem> + <listitem> + <para>JDK (mandatory only if you are building the plugin from the source) +</para> + </listitem> + <listitem> + <para>getfattr +- command line utility</para> + </listitem> + </itemizedlist> + </section> + </section> + <section> + <title>Installing, and Configuring Hadoop Compatible Storage</title> + <para>This section describes how to install and configure Hadoop Compatible Storage in your storage +environment and verify that it is functioning correctly. + +</para> + <orderedlist> + <para>To install and configure Hadoop compatible storage:</para> + <listitem> + <para>Download <filename>glusterfs-hadoop-0.20.2-0.1.x86_64.rpm</filename> file to each server on your cluster. You can download the file from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/qa-releases/3.3-beta-2/glusterfs-hadoop-0.20.2-0.1.x86_64.rpm"/>. + +</para> + </listitem> + <listitem> + <para>To install Hadoop Compatible Storage on all servers in your cluster, run the following command: +</para> + <para><command># rpm –ivh --nodeps glusterfs-hadoop-0.20.2-0.1.x86_64.rpm</command> +</para> + <para>The following files will be extracted: + </para> + <itemizedlist> + <listitem> + <para>/usr/local/lib/glusterfs-<replaceable>Hadoop-version-gluster_plugin_version</replaceable>.jar </para> + </listitem> + <listitem> + <para> /usr/local/lib/conf/core-site.xml</para> + </listitem> + </itemizedlist> + </listitem> + <listitem> + <para>(Optional) To install Hadoop Compatible Storage in a different location, run the following +command: +</para> + <para><command># rpm –ivh --nodeps –prefix /usr/local/glusterfs/hadoop glusterfs-hadoop- 0.20.2-0.1.x86_64.rpm</command> +</para> + </listitem> + <listitem> + <para>Edit the <filename>conf/core-site.xml</filename> file. The following is the sample <filename>conf/core-site.xml</filename> file: +</para> + <para><programlisting><configuration> + <property> + <name>fs.glusterfs.impl</name> + <value>org.apache.hadoop.fs.glusterfs.Gluster FileSystem</value> +</property> + +<property> + <name>fs.default.name</name> + <value>glusterfs://fedora1:9000</value> +</property> + +<property> + <name>fs.glusterfs.volname</name> + <value>hadoopvol</value> +</property> + +<property> + <name>fs.glusterfs.mount</name> + <value>/mnt/glusterfs</value> +</property> + +<property> + <name>fs.glusterfs.server</name> + <value>fedora2</value> +</property> + +<property> + <name>quick.slave.io</name> + <value>Off</value> +</property> +</configuration> +</programlisting></para> + <para>The following are the configurable fields: +</para> + <para><informaltable frame="none"> + <tgroup cols="3"> + <colspec colnum="1" colname="c0" colsep="0"/> + <colspec colnum="2" colname="c1" colsep="0"/> + <colspec colnum="3" colname="c2" colsep="0"/> + <thead> + <row> + <entry>Property Name </entry> + <entry>Default Value </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>fs.default.name </entry> + <entry>glusterfs://fedora1:9000</entry> + <entry>Any hostname in the cluster as the server and any port number. </entry> + </row> + <row> + <entry>fs.glusterfs.volname </entry> + <entry>hadoopvol </entry> + <entry>GlusterFS volume to mount. </entry> + </row> + <row> + <entry>fs.glusterfs.mount </entry> + <entry>/mnt/glusterfs</entry> + <entry>The directory used to fuse mount the volume.</entry> + </row> + <row> + <entry>fs.glusterfs.server </entry> + <entry>fedora2</entry> + <entry>Any hostname or IP address on the cluster except the client/master. </entry> + </row> + <row> + <entry>quick.slave.io </entry> + <entry>Off </entry> + <entry>Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster. <note> + <para>This option is not tested widely</para> + </note></entry> + </row> + </tbody> + </tgroup> + </informaltable></para> + </listitem> + <listitem> + <para>Create a soft link in Hadoop’s library and configuration directory for the downloaded files (in +Step 3) using the following commands: +</para> + <para><command># ln -s <replaceable><target location> <source location</replaceable>></command> +</para> + <para>For example, +</para> + <para><command># ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar <replaceable>$HADOOP_HOME</replaceable>/lib/glusterfs-0.20.2-0.1.jar</command> +</para> + <para><command># ln –s /usr/local/lib/conf/core-site.xml <replaceable>$HADOOP_HOME</replaceable>/conf/core-site.xml </command></para> + </listitem> + <listitem> + <para> (Optional) You can run the following command on Hadoop master to build the plugin and deploy +it along with core-site.xml file, instead of repeating the above steps: +</para> + <para><command># build-deploy-jar.py -d <replaceable>$HADOOP_HOME</replaceable> -c </command></para> + </listitem> + </orderedlist> + </section> + <section> + <title>Starting and Stopping the Hadoop MapReduce Daemon</title> + <para>To start and stop MapReduce daemon</para> + <itemizedlist> + <listitem> + <para>To start MapReduce daemon manually, enter the following command: +</para> + <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/start-mapred.sh</command> +</para> + </listitem> + <listitem> + <para>To stop MapReduce daemon manually, enter the following command: +</para> + <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/stop-mapred.sh </command></para> + </listitem> + </itemizedlist> + <para><note> + <para>You must start Hadoop MapReduce daemon on all servers. +</para> + </note></para> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_UFO.xml b/doc/legacy/docbook/admin_UFO.xml new file mode 100644 index 00000000000..03be14dc9dc --- /dev/null +++ b/doc/legacy/docbook/admin_UFO.xml @@ -0,0 +1,1588 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<chapter id="chap-Administration_Guide-UFO"> + <title>Managing Unified File and Object Storage</title> + <para>Unified File and Object Storage (UFO) unifies NAS and object storage technology. It +provides a system for data storage that enables users to access the same data, both as an object and as a +file, thus simplifying management and controlling storage costs. + +</para> + <para>Unified File and Object Storage is built upon Openstack's Object Storage Swift. Open Stack Object Storage allows users to store and retrieve files and content through a simple Web Service (REST: Representational State Transfer) interface as objects and GlusterFS, allows users to store and retrieve files using Native Fuse and NFS mounts. It uses GlusterFS as a backend file system for Open Stack Swift. It also leverages on Open Stack Swift's web interface for storing and retrieving files over the web combined with GlusterFS features like scalability and high availability, replication, elastic volume management for data management at disk level.</para> + <para>Unified File and Object Storage technology enables enterprises to adopt and deploy +cloud storage solutions. It allows users to access and modify data as objects from a +REST interface along with the ability to access and modify files from NAS interfaces including NFS +and CIFS. In addition to decreasing cost and making it faster and easier to access object data, +it also delivers massive scalability, high availability and replication of object storage. +Infrastructure as a Service (IaaS) providers can utilize GlusterFS Unified File and Object Storage technology to enable their own cloud +storage service. Enterprises can use this technology to accelerate the process of preparing file-based +applications for the cloud and simplify new application development for cloud computing +environments. + +</para> + <para>OpenStack Object Storage is scalable object storage system and it is not a traditional file system. You will not be able to mount this system like traditional SAN or NAS +volumes and perform POSIX compliant operations. </para> + <para><figure> + <title>Unified File and Object Storage Architecture</title> + <mediaobject> + <imageobject> + <imagedata fileref="images/UFO_Architecture.png"/> + </imageobject> + </mediaobject> + </figure></para> + <section> + <title>Components of Object Storage</title> + <para>The major components of Object Storage are: + </para> + <para><emphasis role="bold">Proxy Server</emphasis> + +</para> + <para>All REST requests to the UFO are routed through the Proxy Server. + + +</para> + <para><emphasis role="bold">Objects and Containers </emphasis></para> + <para>An object is the basic storage entity and any optional metadata that represents the data +you store. When you upload data, the data is stored as-is (with no compression or encryption). + +</para> + <para>A container is a storage compartment for your data and provides a way for you to organize +your data. Containers can be visualized as directories in a Linux system. Data must be stored in a container and hence objects are created within a container. + +</para> + <para>It implements objects as files and directories under the container. The object name is a '/' separated path and UFO maps it to directories until the last name in the path, which is marked as a file. With this approach, objects can be accessed as files and directories from native GlusterFS (FUSE) or NFS mounts by providing the '/' separated path.</para> + <para><emphasis role="bold">Accounts and Account Servers</emphasis></para> + <para>The OpenStack Object Storage system is designed to be used by many different storage +consumers. Each user is associated with one or more accounts and must identify themselves using an authentication system. While authenticating, users must provide the name of the account for which the authentication is requested. + +</para> + <para>UFO implements accounts as GlusterFS volumes. So, when a user is granted read/write permission on an account, it means that that user has access to all the data available on that GlusterFS volume. + + + + +</para> + <para><emphasis role="bold">Authentication and Access Permissions</emphasis> + +</para> + <para>You must authenticate against an authentication service to receive OpenStack Object +Storage connection parameters and an authentication token. The token must be passed +in for all subsequent container or object operations. One authentication service that you +can use as a middleware example is called <literal>tempauth</literal>.</para> + <para>By default, each user has their own storage account and has full access to that +account. Users must authenticate with their credentials as described above, but once +authenticated they can manage containers and objects within that account. If a user wants to access the content from another account, they must have API access key or a session token provided by their authentication system.</para> + </section> + <section> + <title>Advantages of using GlusterFS Unified File and Object Storage</title> + <para>The following are the advantages of using GlusterFS UFO:</para> + <itemizedlist> + <listitem> + <para>No limit on upload and download files sizes as compared to Open Stack Swift which limits the object size to 5GB.</para> + </listitem> + <listitem> + <para>A unified view of data across NAS and Object Storage technologies.</para> + </listitem> + <listitem> + <para>Using GlusterFS's UFO has other advantages like the following: </para> + <para><itemizedlist> + <listitem> + <para>High availability</para> + </listitem> + <listitem> + <para>Scalability</para> + </listitem> + <listitem> + <para>Replication</para> + </listitem> + <listitem> + <para>Elastic Volume management</para> + </listitem> + </itemizedlist></para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Preparing to Deploy Unified File and Object Storage</title> + <para>This section provides information on pre-requisites and list of dependencies that will be installed +during the installation of Unified File and Object Storage. +</para> + <section> + <title>Pre-requisites </title> + <para>GlusterFS's Unified File and Object Storage needs <literal>user_xattr</literal> support from the underlying disk file system. +Use the following command to enable <literal>user_xattr</literal> for GlusterFS bricks backend: +</para> + <para><command># mount –o remount,user_xattr <replaceable>device name</replaceable></command></para> + <para>For example, +</para> + <para><command># mount –o remount,user_xattr /dev/hda1 </command> +</para> + </section> + <section> + <title>Dependencies </title> + <para>The following packages are installed on GlusterFS when you install Unified File and Object +Storage: + +</para> + <itemizedlist> + <listitem> + <para>curl + + + + + + + + + + + + + +</para> + </listitem> + <listitem> + <para>memcached</para> + </listitem> + <listitem> + <para>openssl</para> + </listitem> + <listitem> + <para>xfsprogs</para> + </listitem> + <listitem> + <para>python2.6</para> + </listitem> + <listitem> + <para>pyxattr</para> + </listitem> + <listitem> + <para>python-configobj +</para> + </listitem> + <listitem> + <para>python-setuptools + +</para> + </listitem> + <listitem> + <para>python-simplejson + +</para> + </listitem> + <listitem> + <para>python-webob + +</para> + </listitem> + <listitem> + <para>python-eventlet + +</para> + </listitem> + <listitem> + <para>python-greenlet + +</para> + </listitem> + <listitem> + <para>python-pastedeploy + +</para> + </listitem> + <listitem> + <para>python-netifaces +</para> + </listitem> + </itemizedlist> + </section> + </section> + <section> + <title>Installing and Configuring Unified File and Object Storage</title> + <para>This section provides instructions on how to install and configure Unified File and Object Storage in your storage +environment.</para> + <section id="chap-ation_Guide-Dir_Quota-Enable"> + <title>Installing Unified File and Object Storage</title> + <para>To install Unified File and Object Storage:</para> + <orderedlist> + <listitem> + <para>Download <filename>rhel_install.sh</filename> install script from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/"/> . +</para> + </listitem> + <listitem> + <para>Run + <filename>rhel_install.sh</filename> script using the following command: +</para> + <para><command># sh rhel_install.sh</command></para> + </listitem> + <listitem> + <para>Download <filename>swift-1.4.5-1.noarch.rpm</filename> and <filename>swift-plugin-1.0.-1.el6.noarch.rpm</filename> files from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/"/>.</para> + </listitem> + <listitem> + <para>Install <filename>swift-1.4.5-1.noarch.rpm</filename> and <filename>swift-plugin-1.0.-1.el6.noarch.rpm</filename> using the following commands:</para> + <para><command># rpm -ivh swift-1.4.5-1.noarch.rpm</command></para> + <para><command># rpm -ivh swift-plugin-1.0.-1.el6.noarch.rpm</command></para> + <para><note> + <para>You must repeat the above steps on all the machines on which you want to install Unified File and Object Storage. If you install the Unified File and Object Storage on multiple servers, you can use a load balancer like pound, nginx, and so on to distribute the request across the machines.</para> + </note></para> + </listitem> + </orderedlist> + </section> + <section> + <title>Adding Users</title> + <para>The authentication system allows the administrator to grant different levels of access to different users based on the requirement. The following are the types of user permissions: + </para> + <itemizedlist> + <listitem> + <para>admin user + </para> + </listitem> + <listitem> + <para>normal user</para> + </listitem> + </itemizedlist> + <para>Admin user has read and write permissions on the account. By default, a normal user has no read or write permissions. A normal user can only authenticate itself to get a Auth-Token. Read or write permission are provided through ACLs by the admin users.</para> + <para>Add a new user by adding the following entry in <filename>/etc/swift/proxy-server.conf</filename> file:</para> + <para><command>user_<account-name>_<user-name> = <password> [.admin]</command></para> + <para>For example, </para> + <para><command>user_test_tester = testing .admin</command> +</para> + <note> + <para>During installation, the installation script adds few sample users to the <filename>proxy-server.conf</filename> file. It is highly recommended that you remove all the default sample user entries from the configuration file. +</para> + </note> + <para>For more information on setting ACLs, see <xref linkend="chap-Administration_Guide-Working_UFO-Setting_ACLs"/>.</para> + </section> + <section> + <title>Configuring Proxy Server</title> + <para>The Proxy Server is responsible for connecting to the rest of the OpenStack Object Storage architecture. For each request, it looks up the location of the account, container, or object in the ring and route the request accordingly. The public API is also exposed through the proxy server. When objects are streamed to or from an object server, they are streamed directly through the proxy server to or from the user – the proxy server does not spool them. +</para> + <para>The configurable options pertaining to proxy server are stored in <filename>/etc/swift/proxy-server.conf</filename>. The following is the sample <filename>proxy-server.conf</filename> file:</para> + <para><programlisting>[app:proxy-server] +use = egg:swift#proxy +allow_account_management=true +account_autocreate=true + +[filter:tempauth] +use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin +user_test_tester=testing.admin +user_test2_tester2=testing2.admin +user_test_tester3=testing3 + +[filter:healthcheck] +use = egg:swift#healthcheck + +[filter:cache] +use = egg:swift#memcache</programlisting></para> + <para>By default, GlusterFS's Unified File and Object Storage is configured to support HTTP protocol and uses temporary authentication to authenticate the HTTP requests.</para> + </section> + <section> + <title>Configuring Authentication System</title> + <para>Proxy server must be configured to authenticate using <literal> + <literal>tempauth</literal> + </literal>. </para> + </section> + <section> + <title>Configuring Proxy Server for HTTPS</title> + <para>By default, proxy server only handles HTTP request. To configure the proxy server to process HTTPS requests, perform the following steps:</para> + <orderedlist> + <listitem> + <para>Create self-signed cert for SSL using the following commands:</para> + <para><programlisting>cd /etc/swift +openssl req -new -x509 -nodes -out cert.crt -keyout cert.key</programlisting></para> + </listitem> + <listitem> + <para>Add the following lines to <filename>/etc/swift/proxy-server.conf </filename>under <replaceable>[DEFAULT]</replaceable></para> + <para><programlisting>bind_port = 443 + cert_file = /etc/swift/cert.crt + key_file = /etc/swift/cert.key</programlisting></para> + </listitem> + <listitem> + <para>Restart the servers using the following commands:</para> + <para><programlisting>swift-init main stop +swift-init main start</programlisting></para> + </listitem> + </orderedlist> + <para>The following are the configurable options: +</para> + <table frame="all"> + <title>proxy-server.conf Default Options in the [DEFAULT] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>bind_ip </entry> + <entry>0.0.0.0 </entry> + <entry>IP Address for server to bind</entry> + </row> + <row> + <entry>bind_port </entry> + <entry>80 </entry> + <entry>Port for server to bind </entry> + </row> + <row> + <entry>swift_dir </entry> + <entry>/etc/swift </entry> + <entry>Swift configuration directory </entry> + </row> + <row> + <entry>workers </entry> + <entry>1</entry> + <entry>Number of workers to fork </entry> + </row> + <row> + <entry>user </entry> + <entry>swift </entry> + <entry>swift user</entry> + </row> + <row> + <entry>cert_file </entry> + <entry/> + <entry>Path to the ssl .crt </entry> + </row> + <row> + <entry>key_file </entry> + <entry/> + <entry>Path to the ssl .key </entry> + </row> + </tbody> + </tgroup> + </table> + <table frame="all"> + <title>proxy-server.conf Server Options in the [proxy-server] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>use </entry> + <entry/> + <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry> + </row> + <row> + <entry>log_name </entry> + <entry>proxy-server </entry> + <entry>Label used when logging </entry> + </row> + <row> + <entry>log_facility </entry> + <entry>LOG_LOCAL0 </entry> + <entry>Syslog log facility </entry> + </row> + <row> + <entry>log_level </entry> + <entry>INFO </entry> + <entry>Log level </entry> + </row> + <row> + <entry>log_headers </entry> + <entry>True </entry> + <entry>If True, log headers in each request </entry> + </row> + <row> + <entry>recheck_account_existence </entry> + <entry>60 </entry> + <entry>Cache timeout in seconds to send memcached for account existence </entry> + </row> + <row> + <entry>recheck_container_existence </entry> + <entry>60 </entry> + <entry>Cache timeout in seconds to send memcached for container existence </entry> + </row> + <row> + <entry>object_chunk_size </entry> + <entry>65536 </entry> + <entry>Chunk size to read from object servers </entry> + </row> + <row> + <entry>client_chunk_size </entry> + <entry>65536 </entry> + <entry>Chunk size to read from clients </entry> + </row> + <row> + <entry>memcache_servers </entry> + <entry>127.0.0.1:11211 </entry> + <entry>Comma separated list of memcached servers ip:port </entry> + </row> + <row> + <entry>node_timeout </entry> + <entry>10 </entry> + <entry>Request timeout to external services </entry> + </row> + <row> + <entry>client_timeout </entry> + <entry>60 </entry> + <entry>Timeout to read one chunk from a client </entry> + </row> + <row> + <entry>conn_timeout </entry> + <entry>0.5 </entry> + <entry>Connection timeout to external services </entry> + </row> + <row> + <entry>error_suppression_interval </entry> + <entry>60 </entry> + <entry>Time in seconds that must elapse since the last error for a node to be considered no longer error limited </entry> + </row> + <row> + <entry>error_suppression_limit </entry> + <entry>10 </entry> + <entry>Error count to consider a node error limited </entry> + </row> + <row> + <entry>allow_account_management </entry> + <entry>false </entry> + <entry>Whether account <literal>PUT</literal>s and <literal>DELETE</literal>s are even callable </entry> + </row> + </tbody> + </tgroup> + </table> + </section> + <section> + <title>Configuring Object Server</title> + <para>The Object Server is a very simple blob storage server that can store, retrieve, and delete objects stored on local devices. Objects are stored as binary files on the file system with metadata stored in the file’s extended attributes (xattrs). This requires that the underlying file system choice for object servers support xattrs on files. + +</para> + <para>The configurable options pertaining Object Server are stored in the file <filename>/etc/swift/object-server/1.conf</filename>. The following is the sample <filename>object-server/1.conf</filename> file:</para> + <para><programlisting>[DEFAULT] +devices = /srv/1/node +mount_check = false +bind_port = 6010 +user = root +log_facility = LOG_LOCAL2 + +[pipeline:main] +pipeline = gluster object-server + +[app:object-server] +use = egg:swift#object + +[filter:gluster] +use = egg:swift#gluster + +[object-replicator] +vm_test_mode = yes + +[object-updater] +[object-auditor]</programlisting></para> + <para>The following are the configurable options: +</para> + <table frame="all"> + <title>object-server.conf Default Options in the [DEFAULT] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>swift_dir </entry> + <entry>/etc/swift </entry> + <entry>Swift configuration directory </entry> + </row> + <row> + <entry>devices </entry> + <entry>/srv/node </entry> + <entry>Mount parent directory where devices are mounted </entry> + </row> + <row> + <entry>mount_check </entry> + <entry>true </entry> + <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry> + </row> + <row> + <entry>bind_ip </entry> + <entry>0.0.0.0 </entry> + <entry>IP Address for server to bind</entry> + </row> + <row> + <entry>bind_port </entry> + <entry>6000 </entry> + <entry>Port for server to bind</entry> + </row> + <row> + <entry>workers </entry> + <entry>1 </entry> + <entry>Number of workers to fork </entry> + </row> + </tbody> + </tgroup> + </table> + <table frame="all"> + <title>object-server.conf Server Options in the [object-server] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>use </entry> + <entry/> + <entry>paste.deploy entry point for the object server. For most cases, this should be <literal>egg:swift#object</literal>. </entry> + </row> + <row> + <entry>log_name </entry> + <entry>object-server </entry> + <entry>log name used when logging </entry> + </row> + <row> + <entry>log_facility </entry> + <entry>LOG_LOCAL0 </entry> + <entry>Syslog log facility </entry> + </row> + <row> + <entry>log_level </entry> + <entry>INFO </entry> + <entry>Logging level </entry> + </row> + <row> + <entry>log_requests </entry> + <entry>True </entry> + <entry>Whether or not to log each request </entry> + </row> + <row> + <entry>user </entry> + <entry>swift </entry> + <entry>swift user</entry> + </row> + <row> + <entry>node_timeout </entry> + <entry>3</entry> + <entry>Request timeout to external services </entry> + </row> + <row> + <entry>conn_timeout </entry> + <entry>0.5</entry> + <entry>Connection timeout to external services </entry> + </row> + <row> + <entry>network_chunk_size </entry> + <entry>65536 </entry> + <entry>Size of chunks to read or write over the network </entry> + </row> + <row> + <entry>disk_chunk_size </entry> + <entry>65536 </entry> + <entry>Size of chunks to read or write to disk </entry> + </row> + <row> + <entry>max_upload_time </entry> + <entry>65536 </entry> + <entry>Maximum time allowed to upload an object </entry> + </row> + <row> + <entry>slow </entry> + <entry>0</entry> + <entry>If > 0, Minimum time in seconds for a <literal>PUT</literal> or <literal>DELETE</literal> request to complete </entry> + </row> + </tbody> + </tgroup> + </table> + </section> + <section> + <title>Configuring Container Server</title> + <para>The Container Server’s primary job is to handle listings of objects. The listing is done by querying the GlusterFS mount point with path. This query returns a list of all files and directories present under that container. +</para> + <para>The configurable options pertaining to container server are stored in <filename>/etc/swift/container-server/1.conf</filename> file. The following is the sample <filename>container-server/1.conf</filename> file:</para> + <para><programlisting>[DEFAULT] +devices = /srv/1/node +mount_check = false +bind_port = 6011 +user = root +log_facility = LOG_LOCAL2 + +[pipeline:main] +pipeline = gluster container-server + +[app:container-server] +use = egg:swift#container + +[filter:gluster] +use = egg:swift#gluster + +[container-replicator] +[container-updater] +[container-auditor]</programlisting></para> + <para>The following are the configurable options:</para> + <table frame="all"> + <title>container-server.conf Default Options in the [DEFAULT] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>swift_dir </entry> + <entry>/etc/swift </entry> + <entry>Swift configuration directory </entry> + </row> + <row> + <entry>devices </entry> + <entry>/srv/node </entry> + <entry>Mount parent directory where devices are mounted </entry> + </row> + <row> + <entry>mount_check </entry> + <entry>true </entry> + <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry> + </row> + <row> + <entry>bind_ip </entry> + <entry>0.0.0.0 </entry> + <entry>IP Address for server to bind</entry> + </row> + <row> + <entry>bind_port </entry> + <entry>6001 </entry> + <entry>Port for server to bind</entry> + </row> + <row> + <entry>workers </entry> + <entry>1 </entry> + <entry>Number of workers to fork </entry> + </row> + <row> + <entry>user </entry> + <entry>swift </entry> + <entry>Swift user</entry> + </row> + </tbody> + </tgroup> + </table> + <table frame="all"> + <title>container-server.conf Server Options in the [container-server] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>use </entry> + <entry/> + <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry> + </row> + <row> + <entry>log_name </entry> + <entry>container-server </entry> + <entry>Label used when logging </entry> + </row> + <row> + <entry>log_facility </entry> + <entry>LOG_LOCAL0 </entry> + <entry>Syslog log facility </entry> + </row> + <row> + <entry>log_level </entry> + <entry>INFO </entry> + <entry>Logging level </entry> + </row> + <row> + <entry>node_timeout </entry> + <entry>3 </entry> + <entry>Request timeout to external services </entry> + </row> + <row> + <entry>conn_timeout </entry> + <entry>0.5 </entry> + <entry>Connection timeout to external services </entry> + </row> + </tbody> + </tgroup> + </table> + </section> + <section> + <title>Configuring Account Server</title> + <para>The Account Server is very similar to the Container Server, except that it is responsible for listing of containers rather than objects. In UFO, each gluster volume is an account. +</para> + <para>The configurable options pertaining to account server are stored in <filename>/etc/swift/account-server/1.conf</filename> file. The following is the sample <filename>account-server/1.conf</filename> file: </para> + <para><programlisting>[DEFAULT] +devices = /srv/1/node +mount_check = false +bind_port = 6012 +user = root +log_facility = LOG_LOCAL2 + +[pipeline:main] +pipeline = gluster account-server + +[app:account-server] +use = egg:swift#account + +[filter:gluster] +use = egg:swift#gluster + +[account-replicator] +vm_test_mode = yes + +[account-auditor] +[account-reaper]</programlisting></para> + <para>The following are the configurable options:</para> + <table frame="all"> + <title>account-server.conf Default Options in the [DEFAULT] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>swift_dir </entry> + <entry>/etc/swift </entry> + <entry>Swift configuration directory </entry> + </row> + <row> + <entry>devices </entry> + <entry>/srv/node </entry> + <entry>mount parent directory where devices are mounted </entry> + </row> + <row> + <entry>mount_check </entry> + <entry>true </entry> + <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry> + </row> + <row> + <entry>bind_ip </entry> + <entry>0.0.0.0 </entry> + <entry>IP Address for server to bind</entry> + </row> + <row> + <entry>bind_port </entry> + <entry>6002 </entry> + <entry>Port for server to bind</entry> + </row> + <row> + <entry>workers </entry> + <entry>1 </entry> + <entry>Number of workers to fork </entry> + </row> + <row> + <entry>user </entry> + <entry>swift </entry> + <entry>Swift user</entry> + </row> + </tbody> + </tgroup> + </table> + <table frame="all"> + <title>account-server.conf Server Options in the [account-server] section </title> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Option </entry> + <entry>Default </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>use </entry> + <entry/> + <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry> + </row> + <row> + <entry>log_name </entry> + <entry>account-server </entry> + <entry>Label used when logging </entry> + </row> + <row> + <entry>log_facility </entry> + <entry>LOG_LOCAL0 </entry> + <entry>Syslog log facility </entry> + </row> + <row> + <entry>log_level </entry> + <entry>INFO </entry> + <entry>Logging level </entry> + </row> + </tbody> + </tgroup> + </table> + </section> + <section> + <title>Starting and Stopping Server</title> + <para>You must start the server manually when system reboots and whenever you update/modify the configuration files.</para> + <itemizedlist> + <listitem> + <para>To start the server, enter the following command:</para> + <para><command># swift_init main start</command></para> + </listitem> + <listitem> + <para>To stop the server, enter the following command:</para> + <para><command># swift_init main stop</command></para> + </listitem> + </itemizedlist> + </section> + </section> + <section> + <title>Working with Unified File and Object Storage</title> + <para>This section describes the REST API for administering and managing Object Storage. All requests will +be directed to the host and URL described in the <filename>X-Storage-URL HTTP</filename> header obtained during +successful authentication. +</para> + <section> + <title>Configuring Authenticated Access </title> + <para>Authentication is the process of proving identity to the system. To use the REST interface, you must +obtain an authorization token using GET method and supply it with v1.0 as the path. +</para> + <para>Each REST request against the Object Storage system requires the addition of a specific authorization +token HTTP x-header, defined as X-Auth-Token. The storage URL and authentication token are +returned in the headers of the response. +</para> + <itemizedlist> + <listitem> + <para>To authenticate, run the following command: +</para> + <programlisting>GET auth/v1.0 HTTP/1.1 +Host: <auth URL> +X-Auth-User: <account name>:<user name> +X-Auth-Key: <user-Password></programlisting> + <para>For example, +</para> + <programlisting>GET auth/v1.0 HTTP/1.1 +Host: auth.example.com +X-Auth-User: test:tester +X-Auth-Key: testing + +HTTP/1.1 200 OK +X-Storage-Url: https:/example.storage.com:443/v1/AUTH_test +X-Storage-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554 +X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554 +Content-Length: 0 +Date: Wed, 10 jul 2011 06:11:51 GMT</programlisting> + <para>To authenticate access using cURL (for the above example), run the following +command: +</para> + <programlisting>curl -v -H 'X-Storage-User: test:tester' -H 'X-Storage-Pass:testing' -k +https://auth.example.com:443/auth/v1.0</programlisting> + <para>The X-Auth-Url has to be parsed and used in the connection and request line of all subsequent +requests to the server. In the example output, users connecting to server will send most +container/object requests with a host header of example.storage.com and the request line's version +and account as v1/AUTH_test. + +</para> + </listitem> + </itemizedlist> + <note> + <para>The authentication tokens are valid for a 24 hour period. +</para> + </note> + </section> + <section> + <title>Working with Accounts </title> + <para>This section describes the list of operations you can perform at the account level of the URL. +</para> + <section> + <title>Displaying Container Information </title> + <para>You can list the objects of a specific container, or all containers, as needed using GET command. You +can use the following optional parameters with GET request to refine the results: +</para> + <para><informaltable frame="none"> + <tgroup cols="2"> + <colspec colnum="1" colname="c0" colsep="0"/> + <colspec colnum="2" colname="c1" colsep="0"/> + <thead> + <row> + <entry>Parameter </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>limit </entry> + <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry> + </row> + <row> + <entry>marker </entry> + <entry>Returns object names greater in value than the specified marker. </entry> + </row> + <row> + <entry>format </entry> + <entry>Specify either json or xml to return the respective serialized response. </entry> + </row> + </tbody> + </tgroup> + </informaltable></para> + <para><emphasis role="bold">To display container information </emphasis></para> + <itemizedlist> + <listitem> + <para>List all the containers of an account using the following command: +</para> + <para><programlisting>GET /<apiversion>/<account> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key></programlisting></para> + <para>For example, +</para> + <programlisting>GET /v1/AUTH_test HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 200 Ok +Date: Wed, 13 Jul 2011 16:32:21 GMT +Server: Apache +Content-Type: text/plain; charset=UTF-8 +Content-Length: 39 + +songs +movies +documents +reports</programlisting> + </listitem> + </itemizedlist> + <para>To display container information using cURL (for the above example), run the following +command: +</para> + <para><programlisting>curl -v -X GET -H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test -k</programlisting></para> + </section> + <section> + <title>Displaying Account Metadata Information </title> + <para>You can issue HEAD command to the storage service to view the number of containers and the total +bytes stored in the account. +</para> + <itemizedlist> + <listitem> + <para>To display containers and storage used, run the following command: +</para> + <programlisting>HEAD /<apiversion>/<account> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key></programlisting> + <para>For example, +</para> + <programlisting>HEAD /v1/AUTH_test HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 204 No Content +Date: Wed, 13 Jul 2011 16:52:21 GMT +Server: Apache +X-Account-Container-Count: 4 +X-Account-Total-Bytes-Used: 394792</programlisting> + <para>To display account metadata information using cURL (for the above example), run the following +command: +</para> + <programlisting>curl -v -X HEAD -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test -k</programlisting> + </listitem> + </itemizedlist> + </section> + </section> + <section> + <title>Working with Containers </title> + <para>This section describes the list of operations you can perform at the container level of the URL. +</para> + <section> + <title> Creating Containers </title> + <para>You can use PUT command to create containers. Containers are the storage folders for your data. +The URL encoded name must be less than 256 bytes and cannot contain a forward slash '/' character. +</para> + <itemizedlist> + <listitem> + <para>To create a container, run the following command: +</para> + <programlisting>PUT /<apiversion>/<account>/<container>/ HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key></programlisting> + <para>For example, +</para> + <programlisting>PUT /v1/AUTH_test/pictures/ HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 +HTTP/1.1 201 Created + +Date: Wed, 13 Jul 2011 17:32:21 GMT +Server: Apache +Content-Type: text/plain; charset=UTF-8</programlisting> + <para>To create container using cURL (for the above example), run the following command: +</para> + <programlisting>curl -v -X PUT -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting> + <para>The status code of 201 (Created) indicates that you have successfully created the container. If a +container with same is already existed, the status code of 202 is displayed. +</para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Displaying Objects of a Container </title> + <para>You can list the objects of a container using GET command. You can use the following optional +parameters with GET request to refine the results: +</para> + <para><informaltable frame="none"> + <tgroup cols="2"> + <colspec colnum="1" colname="c0" colsep="0"/> + <colspec colnum="2" colname="c1" colsep="0"/> + <thead> + <row> + <entry>Parameter </entry> + <entry>Description </entry> + </row> + </thead> + <tbody> + <row> + <entry>limit </entry> + <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry> + </row> + <row> + <entry>marker </entry> + <entry>Returns object names greater in value than the specified marker. </entry> + </row> + <row> + <entry>prefix </entry> + <entry>Displays the results limited to object names beginning with the substring x. beginning with the substring x. </entry> + </row> + <row> + <entry>path </entry> + <entry>Returns the object names nested in the pseudo path. </entry> + </row> + <row> + <entry>format </entry> + <entry>Specify either json or xml to return the respective serialized response. </entry> + </row> + <row> + <entry>delimiter </entry> + <entry>Returns all the object names nested in the container. </entry> + </row> + </tbody> + </tgroup> + </informaltable></para> + <para>To display objects of a container +</para> + <itemizedlist> + <listitem> + <para>List objects of a specific container using the following command: +</para> + </listitem> + </itemizedlist> + <programlisting>GET /<apiversion>/<account>/<container>[parm=value] HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key></programlisting> + <para>For example, +</para> + <programlisting>GET /v1/AUTH_test/images HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 200 Ok +Date: Wed, 13 Jul 2011 15:42:21 GMT +Server: Apache +Content-Type: text/plain; charset=UTF-8 +Content-Length: 139 + +sample file.jpg +test-file.pdf +You and Me.pdf +Puddle of Mudd.mp3 +Test Reports.doc</programlisting> + <para>To display objects of a container using cURL (for the above example), run the following +command: +</para> + <programlisting>curl -v -X GET-H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/images -k</programlisting> + </section> + <section> + <title>Displaying Container Metadata Information </title> + <para>You can issue HEAD command to the storage service to view the number of objects in a container and +the total bytes of all the objects stored in the container. +</para> + <itemizedlist> + <listitem> + <para>To display list of objects and storage used, run the following command: +</para> + <programlisting>HEAD /<apiversion>/<account>/<container> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key></programlisting> + <para>For example,</para> + <programlisting>HEAD /v1/AUTH_test/images HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 204 No Content +Date: Wed, 13 Jul 2011 19:52:21 GMT +Server: Apache +X-Account-Object-Count: 8 +X-Container-Bytes-Used: 472</programlisting> + <para>To display list of objects and storage used in a container using cURL (for the above example), run +the following command: +</para> + <programlisting>curl -v -X HEAD -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/images -k</programlisting> + </listitem> + </itemizedlist> + </section> + <section> + <title>Deleting Container </title> + <para>You can use DELETE command to permanently delete containers. The container must be empty +before it can be deleted. +</para> + <para>You can issue HEAD command to determine if it contains any objects. +</para> + <itemizedlist> + <listitem> + <para>To delete a container, run the following command: +</para> + <programlisting>DELETE /<apiversion>/<account>/<container>/ HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key></programlisting> + <para>For example,</para> + <programlisting>DELETE /v1/AUTH_test/pictures HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 204 No Content +Date: Wed, 13 Jul 2011 17:52:21 GMT +Server: Apache +Content-Length: 0 +Content-Type: text/plain; charset=UTF-8</programlisting> + <para>To delete a container using cURL (for the above example), run the following command: +</para> + <programlisting>curl -v -X DELETE -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting> + <para>The status code of 204 (No Content) indicates that you have successfully deleted the container. If +that container does not exist, the status code 404 (Not Found) is displayed, and if the container is +not empty, the status code 409 (Conflict) is displayed. +</para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Updating Container Metadata </title> + <para>You can update the metadata of container using POST operation, metadata keys should be prefixed +with 'x-container-meta'. +</para> + <itemizedlist> + <listitem> + <para>To update the metadata of the object, run the following command: +</para> + <programlisting>POST /<apiversion>/<account>/<container> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <Authentication-token-key> +X-Container-Meta-<key>: <new value> +X-Container-Meta-<key>: <new value></programlisting> + <para>For example, +</para> + <para><programlisting>POST /v1/AUTH_test/images HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 +X-Container-Meta-Zoo: Lion +X-Container-Meta-Home: Dog + +HTTP/1.1 204 No Content +Date: Wed, 13 Jul 2011 20:52:21 GMT +Server: Apache +Content-Type: text/plain; charset=UTF-8</programlisting></para> + <para>To update the metadata of the object using cURL (for the above example), run the following +command: +</para> + <para><programlisting>curl -v -X POST -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/images -H ' X-Container-Meta-Zoo: Lion' -H 'X-Container-Meta-Home: Dog' -k</programlisting></para> + <para>The status code of 204 (No Content) indicates the container's metadata is updated successfully. If +that object does not exist, the status code 404 (Not Found) is displayed. +</para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Working_UFO-Setting_ACLs"> + <title> Setting ACLs on Container </title> + <para>You can set the container access control list by using POST command on container with <command>x- container-read</command> and<command> x-container-write</command> keys. +</para> + <para>The ACL format is <command>[item[,item...]]</command>. Each item can be a group name to give access to or a +referrer designation to grant or deny based on the HTTP Referer header. +</para> + <para>The referrer designation format is:<command> .r:[-]value</command>. +</para> + <para>The .r can also be <command>.ref, .referer, </command>or .<command>referrer</command>; though it will be shortened to.r for +decreased character count usage. The value can be <command>*</command> to specify any referrer host is allowed access. The leading minus sign (-) +indicates referrer hosts that should be denied access. +</para> + <para>Examples of valid ACLs: +</para> + <para><programlisting>.r:* +.r:*,bobs_account,sues_account:sue +bobs_account,sues_account:sue</programlisting></para> + <para>Examples of invalid ACLs:</para> + <para><programlisting>.r: +.r:-</programlisting></para> + <para>By default, allowing read access via <command><command>.</command>r </command>will not allow listing objects in the container but allows +retrieving objects from the container. To turn on listings, use the .<command>rlistings</command> directive. Also, <command>.r</command> +designations are not allowed in headers whose names include the word write. +</para> + <para>For example, to set all the objects access rights to "public‟ inside the container using cURL (for the +above example), run the following command: +</para> + <para><programlisting>curl -v -X POST -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/images +-H 'X-Container-Read: .r:*' -k</programlisting></para> + </section> + </section> + <section> + <title> Working with Objects </title> + <para>An object represents the data and any metadata for the files stored in the system. Through the REST +interface, metadata for an object can be included by adding custom HTTP headers to the request +and the data payload as the request body. Objects name should not exceed 1024 bytes after URL +encoding. +</para> + <para>This section describes the list of operations you can perform at the object level of the URL. +</para> + <section> + <title>Creating or Updating Object </title> + <para>You can use PUT command to write or update an object's content and metadata. +</para> + <para>You can verify the data integrity by including an MD5checksum for the object's data in the ETag +header. ETag header is optional and can be used to ensure that the object's contents are stored +successfully in the storage system. +</para> + <para>You can assign custom metadata to objects by including additional HTTP headers on the PUT request. +The objects created with custom metadata via HTTP headers are identified with the<command>X-Object- Meta</command>- prefix. +</para> + <itemizedlist> + <listitem> + <para>To create or update an object, run the following command: +</para> + <para><programlisting>PUT /<apiversion>/<account>/<container>/<object> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key> +ETag: da1e100dc9e7becc810986e37875ae38 +Content-Length: 342909 +X-Object-Meta-PIN: 2343</programlisting></para> + <para>For example,</para> + <para><programlisting>PUT /v1/AUTH_test/pictures/dog HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 +ETag: da1e100dc9e7becc810986e37875ae38 + +HTTP/1.1 201 Created +Date: Wed, 13 Jul 2011 18:32:21 GMT +Server: Apache +ETag: da1e100dc9e7becc810986e37875ae38 +Content-Length: 0 +Content-Type: text/plain; charset=UTF-8</programlisting></para> + <para>To create or update an object using cURL (for the above example), run the following command: +</para> + <para><programlisting>curl -v -X PUT -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/pictures/dog -H 'Content- +Length: 0' -k</programlisting></para> + <para>The status code of 201 (Created) indicates that you have successfully created or updated the object. +If there is a missing content-Length or Content-Type header in the request, the status code of 412 +(Length Required) is displayed. (Optionally) If the MD5 checksum of the data written to the storage +system does not match the ETag value, the status code of 422 (Unprocessable Entity) is displayed. +</para> + </listitem> + </itemizedlist> + <section> + <title>Chunked Transfer Encoding </title> + <para>You can upload data without knowing the size of the data to be uploaded. You can do this by +specifying an HTTP header of Transfer-Encoding: chunked and without using a Content-Length +header. +</para> + <para>You can use this feature while doing a DB dump, piping the output through gzip, and then piping the +data directly into Object Storage without having to buffer the data to disk to compute the file size. +</para> + <itemizedlist> + <listitem> + <para>To create or update an object, run the following command: + </para> + <para><programlisting>PUT /<apiversion>/<account>/<container>/<object> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <authentication-token-key> +Transfer-Encoding: chunked +X-Object-Meta-PIN: 2343</programlisting></para> + <para>For example, +</para> + <para><programlisting>PUT /v1/AUTH_test/pictures/cat HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 +Transfer-Encoding: chunked +X-Object-Meta-PIN: 2343 +19 +A bunch of data broken up +D +into chunks. +0</programlisting> + +</para> + </listitem> + </itemizedlist> + </section> + </section> + <section> + <title>Copying Object </title> + <para>You can copy object from one container to another or add a new object and then add reference to +designate the source of the data from another container. +</para> + <para><emphasis role="bold">To copy object from one container to another </emphasis></para> + <itemizedlist> + <listitem> + <para>To add a new object and designate the source of the data from another container, run the +following command: +</para> + <para><programlisting>COPY /<apiversion>/<account>/<container>/<sourceobject> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: < authentication-token-key> +Destination: /<container>/<destinationobject></programlisting></para> + <para>For example, +</para> + <para><programlisting>COPY /v1/AUTH_test/images/dogs HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 +Destination: /photos/cats + +HTTP/1.1 201 Created +Date: Wed, 13 Jul 2011 18:32:21 GMT +Server: Apache +Content-Length: 0 +Content-Type: text/plain; charset=UTF-8</programlisting></para> + <para>To copy an object using cURL (for the above example), run the following command: +</para> + <para><programlisting>curl -v -X COPY -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' -H 'Destination: /photos/cats' -k https://example.storage.com:443/v1/AUTH_test/images/dogs</programlisting></para> + <para>The status code of 201 (Created) indicates that you have successfully copied the object. If there is a +missing content-Length or Content-Type header in the request, the status code of 412 (Length +Required) is displayed. +</para> + <para>You can also use PUT command to copy object by using additional header <command>X-Copy-From: container/obj</command>. +</para> + </listitem> + <listitem> + <para>To use PUT command to copy an object, run the following command: +</para> + <para><programlisting>PUT /v1/AUTH_test/photos/cats HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 +X-Copy-From: /images/dogs + +HTTP/1.1 201 Created +Date: Wed, 13 Jul 2011 18:32:21 GMT +Server: Apache +Content-Type: text/plain; charset=UTF-8</programlisting></para> + <para>To copy an object using cURL (for the above example), run the following command: +</para> + <para><programlisting>curl -v -X PUT -H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554' +-H 'X-Copy-From: /images/dogs' –k +https://example.storage.com:443/v1/AUTH_test/images/cats</programlisting></para> + <para>The status code of 201 (Created) indicates that you have successfully copied the object. +</para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Displaying Object Information </title> + <para>You can issue GET command on an object to view the object data of the object. +</para> + <itemizedlist> + <listitem> + <para>To display the content of an object run the following command:</para> + <para><programlisting>GET /<apiversion>/<account>/<container>/<object> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <Authentication-token-key></programlisting></para> + <para>For example, +</para> + <para><programlisting>GET /v1/AUTH_test/images/cat HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 200 Ok +Date: Wed, 13 Jul 2011 23:52:21 GMT +Server: Apache +Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT +ETag: 8a964ee2a5e88be344f36c22562a6486 +Content-Length: 534210 +[.........]</programlisting></para> + <para>To display the content of an object using cURL (for the above example), run the following +command: +</para> + <para><programlisting>curl -v -X GET -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para> + <para>The status code of 200 (Ok) indicates the object‟s data is displayed successfully. If that object does +not exist, the status code 404 (Not Found) is displayed. +</para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Displaying Object Metadata </title> + <para>You can issue HEAD command on an object to view the object metadata and other standard HTTP +headers. You must send only authorization token as header. +</para> + <itemizedlist> + <listitem> + <para>To display the metadata of the object, run the following command: +</para> + </listitem> + </itemizedlist> + <para><programlisting>HEAD /<apiversion>/<account>/<container>/<object> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <Authentication-token-key></programlisting></para> + <para>For example, +</para> + <para><programlisting>HEAD /v1/AUTH_test/images/cat HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 204 No Content +Date: Wed, 13 Jul 2011 21:52:21 GMT +Server: Apache +Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT +ETag: 8a964ee2a5e88be344f36c22562a6486 +Content-Length: 512000 +Content-Type: text/plain; charset=UTF-8 +X-Object-Meta-House: Cat +X-Object-Meta-Zoo: Cat +X-Object-Meta-Home: Cat +X-Object-Meta-Park: Cat</programlisting></para> + <para>To display the metadata of the object using cURL (for the above example), run the following +command: +</para> + <para><programlisting>curl -v -X HEAD -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para> + <para>The status code of 204 (No Content) indicates the object‟s metadata is displayed successfully. If that +object does not exist, the status code 404 (Not Found) is displayed. +</para> + </section> + <section> + <title>Updating Object Metadata </title> + <para>You can issue POST command on an object name only to set or overwrite arbitrary key metadata. You +cannot change the object‟s other headers such as Content-Type, ETag and others using POST +operation. The POST command will delete all the existing metadata and replace it with the new +arbitrary key metadata. +</para> + <para>You must prefix <emphasis role="bold">X-Object-Meta-</emphasis> to the key names. +</para> + <itemizedlist> + <listitem> + <para>To update the metadata of an object, run the following command:</para> + <para><programlisting>POST /<apiversion>/<account>/<container>/<object> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <Authentication-token-key> +X-Object-Meta-<key>: <new value> +X-Object-Meta-<key>: <new value></programlisting> +</para> + <para>For example, +</para> + <para><programlisting>POST /v1/AUTH_test/images/cat HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 +X-Object-Meta-Zoo: Lion +X-Object-Meta-Home: Dog + +HTTP/1.1 202 Accepted +Date: Wed, 13 Jul 2011 22:52:21 GMT +Server: Apache +Content-Length: 0 +Content-Type: text/plain; charset=UTF-8</programlisting></para> + <para>To update the metadata of an object using cURL (for the above example), run the following +command: +</para> + <para><programlisting>curl -v -X POST -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/images/cat -H ' X-Object- +Meta-Zoo: Lion' -H 'X-Object-Meta-Home: Dog' -k</programlisting></para> + <para>The status code of 202 (Accepted) indicates that you have successfully updated the object‟s +metadata. If that object does not exist, the status code 404 (Not Found) is displayed. + +</para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Deleting Object </title> + <para>You can use DELETE command to permanently delete the object. +</para> + <para>The DELETE command on an object will be processed immediately and any subsequent operations +like GET, HEAD, POST, or DELETE on the object will display 404 (Not Found) error. +</para> + <itemizedlist> + <listitem> + <para>To delete an object, run the following command: +</para> + <para><programlisting>DELETE /<apiversion>/<account>/<container>/<object> HTTP/1.1 +Host: <storage URL> +X-Auth-Token: <Authentication-token-key></programlisting></para> + <para>For example, +</para> + <para><programlisting>DELETE /v1/AUTH_test/pictures/cat HTTP/1.1 +Host: example.storage.com +X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554 + +HTTP/1.1 204 No Content +Date: Wed, 13 Jul 2011 20:52:21 GMT +Server: Apache +Content-Type: text/plain; charset=UTF-8</programlisting></para> + <para>To delete an object using cURL (for the above example), run the following command: +</para> + <para><programlisting>curl -v -X DELETE -H 'X-Auth-Token: +AUTH_tkde3ad38b087b49bbbac0494f7600a554' +https://example.storage.com:443/v1/AUTH_test/pictures/cat -k</programlisting></para> + <para>The status code of 204 (No Content) indicates that you have successfully deleted the object. If that +object does not exist, the status code 404 (Not Found) is displayed. +</para> + </listitem> + </itemizedlist> + </section> + </section> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_commandref.xml b/doc/legacy/docbook/admin_commandref.xml new file mode 100644 index 00000000000..5e15605345f --- /dev/null +++ b/doc/legacy/docbook/admin_commandref.xml @@ -0,0 +1,334 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter id="chap-Administration_Guide-Com_Ref"> + <title>Command Reference </title> + <para>This section describes the available commands and includes the +following section: +</para> + <itemizedlist> + <listitem> + <para>gluster Command +</para> + <para>Gluster Console Manager (command line interpreter) +</para> + </listitem> + <listitem> + <para>glusterd Daemon +</para> + <para>Gluster elastic volume management daemon +</para> + </listitem> + </itemizedlist> + <section> + <title>gluster Command </title> + <para><emphasis role="bold">NAME</emphasis> +</para> + <para>gluster - Gluster Console Manager (command line interpreter) +</para> + <para><emphasis role="bold">SYNOPSIS</emphasis> +</para> + <para>To run the program and display the gluster prompt: +</para> + <para><emphasis role="bold">gluster</emphasis> +</para> + <para>To specify a command directly: +gluster [COMMANDS] [OPTIONS] +</para> + <para><emphasis role="bold">DESCRIPTION</emphasis> +</para> + <para>The Gluster Console Manager is a command line utility for elastic volume management. You can run +the gluster command on any export server. The command enables administrators to perform cloud +operations such as creating, expanding, shrinking, rebalancing, and migrating volumes without +needing to schedule server downtime. +</para> + <para><emphasis role="bold">COMMANDS</emphasis> +</para> + <para><informaltable frame="none"> + <tgroup cols="3"> + <colspec colnum="1" colname="c0" colsep="0"/> + <colspec colnum="2" colname="cgen1" colsep="0"/> + <colspec colnum="3" colname="c1" colsep="0"/> + <thead> + <row> + <entry>Command</entry> + <entry namest="cgen1" nameend="c1">Description</entry> + </row> + </thead> + <tbody> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Volume</emphasis> + </entry> + </row> + <row> + <entry>volume info [all | VOLNAME]</entry> + <entry namest="cgen1" nameend="c1">Displays information about all volumes, or the specified volume.</entry> + </row> + <row> + <entry>volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ...</entry> + <entry namest="cgen1" nameend="c1">Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).</entry> + </row> + <row> + <entry>volume delete VOLNAME</entry> + <entry namest="cgen1" nameend="c1">Deletes the specified volume.</entry> + </row> + <row> + <entry>volume start VOLNAME </entry> + <entry namest="cgen1" nameend="c1">Starts the specified volume.</entry> + </row> + <row> + <entry>volume stop VOLNAME [force] </entry> + <entry namest="cgen1" nameend="c1">Stops the specified volume. </entry> + </row> + <row> + <entry>volume rename VOLNAME NEW-VOLNAME </entry> + <entry namest="cgen1" nameend="c1">Renames the specified volume.</entry> + </row> + <row> + <entry>volume help </entry> + <entry namest="cgen1" nameend="c1">Displays help for the volume command.</entry> + </row> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Brick</emphasis> + </entry> + </row> + <row> + <entry>volume add-brick VOLNAME NEW-BRICK ... </entry> + <entry namest="cgen1" nameend="c1">Adds the specified brick to the specified volume.</entry> + </row> + <row> + <entry>volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status </entry> + <entry namest="cgen1" nameend="c1">Replaces the specified brick.</entry> + </row> + <row> + <entry>volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... </entry> + <entry namest="cgen1" nameend="c1">Removes the specified brick from the specified volume.</entry> + </row> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Rebalance</emphasis> + </entry> + </row> + <row> + <entry>volume rebalance VOLNAME start</entry> + <entry namest="cgen1" nameend="c1">Starts rebalancing the specified volume.</entry> + </row> + <row> + <entry>volume rebalance VOLNAME stop </entry> + <entry namest="cgen1" nameend="c1">Stops rebalancing the specified volume. </entry> + </row> + <row> + <entry>volume rebalance VOLNAME status </entry> + <entry namest="cgen1" nameend="c1">Displays the rebalance status of the specified volume.</entry> + </row> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Log</emphasis> + </entry> + </row> + <row> + <entry>volume log filename VOLNAME [BRICK] DIRECTORY </entry> + <entry namest="cgen1" nameend="c1">Sets the log directory for the corresponding volume/brick. </entry> + </row> + <row> + <entry>volume log rotate VOLNAME [BRICK] </entry> + <entry namest="cgen1" nameend="c1">Rotates the log file for corresponding volume/brick.</entry> + </row> + <row> + <entry>volume log locate VOLNAME [BRICK] </entry> + <entry namest="cgen1" nameend="c1">Locates the log file for corresponding volume/brick. </entry> + </row> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Peer</emphasis> + </entry> + </row> + <row> + <entry>peer probe HOSTNAME </entry> + <entry namest="cgen1" nameend="c1">Probes the specified peer. </entry> + </row> + <row> + <entry>peer detach HOSTNAME </entry> + <entry namest="cgen1" nameend="c1">Detaches the specified peer. </entry> + </row> + <row> + <entry>peer status </entry> + <entry namest="cgen1" nameend="c1">Displays the status of peers. </entry> + </row> + <row> + <entry>peer help </entry> + <entry namest="cgen1" nameend="c1">Displays help for the peer command.</entry> + </row> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Geo-replication</emphasis> + </entry> + </row> + <row> + <entry>volume geo-replication MASTER SLAVE start</entry> + <entry namest="cgen1" nameend="c1"> + <para>Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.</para> + <para>You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.</para> + </entry> + </row> + <row> + <entry>volume geo-replication MASTER SLAVE stop</entry> + <entry namest="cgen1" nameend="c1"> + <para>Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.</para> + <para>You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY. +</para> + </entry> + </row> + <row> + <entry morerows="10">volume geo-replication MASTER SLAVE config [options]</entry> + <entry/> + <entry>Configure geo-replication options between the hosts specified by MASTER and SLAVE. </entry> + </row> + <row> + <entry>gluster-command COMMAND</entry> + <entry>The path where the gluster command is installed.</entry> + </row> + <row> + <entry>gluster-log-level LOGFILELEVEL</entry> + <entry>The log level for gluster processes.</entry> + </row> + <row> + <entry>log-file LOGFILE</entry> + <entry>The path to the geo-replication log file.</entry> + </row> + <row> + <entry>log-level LOGFILELEVEL</entry> + <entry>The log level for geo-replication.</entry> + </row> + <row> + <entry>remote-gsyncd COMMAND</entry> + <entry>The path where the gsyncd binary is installed on the remote machine.</entry> + </row> + <row> + <entry>ssh-command COMMAND</entry> + <entry>The ssh command to use to connect to the remote machine (the default is ssh).</entry> + </row> + <row> + <entry>rsync-command COMMAND</entry> + <entry>The rsync command to use for synchronizing the files (the default is rsync).</entry> + </row> + <row> + <entry>volume_id= UID</entry> + <entry>The command to delete the existing master UID for the intermediate/slave node.</entry> + </row> + <row> + <entry>timeout SECONDS</entry> + <entry>The timeout period.</entry> + </row> + <row> + <entry>sync-jobs N</entry> + <entry>The number of simultaneous files/directories that can be synchronized.</entry> + </row> + <row> + <entry/> + <entry>ignore-deletes</entry> + <entry>If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.</entry> + </row> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Other</emphasis> + </entry> + </row> + <row> + <entry>help</entry> + <entry/> + <entry>Display the command options.</entry> + </row> + <row> + <entry>quit</entry> + <entry/> + <entry>Exit the gluster command line interface.</entry> + </row> + </tbody> + </tgroup> + </informaltable></para> + <para><emphasis role="bold">FILES</emphasis> + +</para> + <para>/var/lib/glusterd/* +</para> + <para><emphasis role="bold">SEE ALSO </emphasis></para> + <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), glusterd(8)</para> + </section> + <section> + <title>glusterd Daemon </title> + <para><emphasis role="bold">NAME</emphasis> +</para> + <para>glusterd - Gluster elastic volume management daemon</para> + <para><emphasis role="bold">SYNOPSIS</emphasis> +</para> + <para>glusterd [OPTION...] +</para> + <para><emphasis role="bold">DESCRIPTION</emphasis> +</para> + <para>The glusterd daemon is used for elastic volume management. The daemon must be run on all export servers. +</para> + <para><emphasis role="bold">OPTIONS</emphasis> +</para> + <para><informaltable frame="none"> + <tgroup cols="2"> + <colspec colnum="1" colname="c0" colsep="0"/> + <colspec colnum="2" colname="c1" colsep="0"/> + <thead> + <row> + <entry>Option</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Basic</emphasis> + </entry> + </row> + <row> + <entry>-l=LOGFILE, --log-file=LOGFILE</entry> + <entry>Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).</entry> + </row> + <row> + <entry>-L=LOGLEVEL, --log-level=LOGLEVEL</entry> + <entry>Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO). </entry> + </row> + <row> + <entry>--debug</entry> + <entry>Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.</entry> + </row> + <row> + <entry>-N, --no-daemon</entry> + <entry>Runs the program in the foreground.</entry> + </row> + <row> + <entry namest="c0" nameend="c1" align="left"> + <emphasis role="bold">Miscellaneous</emphasis> + </entry> + </row> + <row> + <entry>-?, --help</entry> + <entry>Displays this help.</entry> + </row> + <row> + <entry>--usage</entry> + <entry>Displays a short usage message.</entry> + </row> + <row> + <entry>-V, --version</entry> + <entry>Prints the program version.</entry> + </row> + </tbody> + </tgroup> + </informaltable></para> + <para><emphasis role="bold">FILES</emphasis> + +</para> + <para>/var/lib/glusterd/* +</para> + <para><emphasis role="bold">SEE ALSO </emphasis></para> + <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), gluster(8)</para> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_console.xml b/doc/legacy/docbook/admin_console.xml new file mode 100644 index 00000000000..ebf273935ca --- /dev/null +++ b/doc/legacy/docbook/admin_console.xml @@ -0,0 +1,28 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter> + <title>Using the Gluster Console Manager – Command Line Utility</title> + <para>The Gluster Console Manager is a single command line utility that simplifies configuration and management of your storage environment. The Gluster Console Manager is similar to the LVM (Logical Volume Manager) CLI or ZFS Command Line Interface, but across multiple storage servers. You can use the Gluster Console Manager online, while volumes are mounted and active. Gluster automatically synchronizes volume configuration information across all Gluster servers.</para> + <para>Using the Gluster Console Manager, you can create new volumes, start volumes, and stop volumes, as required. You can also add bricks to volumes, remove bricks from existing volumes, as well as change translator settings, among other operations.</para> + <para>You can also use the commands to create scripts for automation, as well as use the commands as an API to allow integration with third-party applications. </para> + <para><emphasis role="bold">Running the Gluster Console Manager</emphasis></para> + <para>You can run the Gluster Console Manager on any GlusterFS server either by invoking the commands or by running the Gluster CLI in interactive mode. You can also use the gluster command remotely using SSH. </para> + <itemizedlist> + <listitem> + <para>To run commands directly: </para> + <para><command> # gluster peer <replaceable>command</replaceable></command></para> + <para>For example:</para> + <para><command> # gluster peer status </command></para> + </listitem> + <listitem> + <para>To run the Gluster Console Manager in interactive mode </para> + <para><command># gluster</command></para> + <para>You can execute gluster commands from the Console Manager prompt:</para> + <para><command> gluster> <replaceable>command</replaceable></command> </para> + <para>For example, to view the status of the peer server:</para> + <para># <command>gluster </command></para> + <para><command>gluster > peer status </command></para> + <para>Display the status of the peer.</para> + </listitem> + </itemizedlist> +</chapter> diff --git a/doc/legacy/docbook/admin_directory_Quota.xml b/doc/legacy/docbook/admin_directory_Quota.xml new file mode 100644 index 00000000000..8a1012a6ac2 --- /dev/null +++ b/doc/legacy/docbook/admin_directory_Quota.xml @@ -0,0 +1,179 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter id="chap-Administration_Guide-Dir_Quota"> + <title>Managing Directory Quota </title> + <para>Directory quotas in GlusterFS allow you to set limits on usage of disk space by directories or volumes. +The storage administrators can control the disk space utilization at the directory and/or volume +levels in GlusterFS by setting limits to allocatable disk space at any level in the volume and directory +hierarchy. This is particularly useful in cloud deployments to facilitate utility billing model. + </para> + <para> <note> + <para>For now, only Hard limit is supported. Here, the limit cannot be exceeded and attempts to use +more disk space or inodes beyond the set limit will be denied. +</para> + </note></para> + <para>System administrators can also monitor the resource utilization to limit the storage for the users +depending on their role in the organization. +</para> + <para>You can set the quota at the following levels: + </para> + <itemizedlist> + <listitem> + <para>Directory level – limits the usage at the directory level + </para> + </listitem> + <listitem> + <para>Volume level – limits the usage at the volume level + </para> + </listitem> + </itemizedlist> + <note> + <para>You can set the disk limit on the directory even if it is not created. The disk limit is enforced +immediately after creating that directory. For more information on setting disk limit, see <xref linkend="chap-Administration_Guide-Dir_Quota-Set_Replace"/>. +</para> + </note> + <section id="chap-Administration_Guide-Dir_Quota-Enable"> + <title>Enabling Quota </title> + <para>You must enable Quota to set disk limits. +</para> + <para><emphasis role="bold">To enable quota</emphasis> +</para> + <itemizedlist> + <listitem> + <para>Enable the quota using the following command: +</para> + <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> enable </command></para> + <para>For example, to enable quota on test-volume: +</para> + <programlisting># gluster volume quota test-volume enable +Quota is enabled on /test-volume</programlisting> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Dir_Quota-Disable"> + <title>Disabling Quota </title> + <para>You can disable Quota, if needed. +</para> + <para><emphasis role="bold">To disable quota:</emphasis> +</para> + <itemizedlist> + <listitem> + <para>Disable the quota using the following command: +</para> + <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> disable </command></para> + <para>For example, to disable quota translator on test-volume: +</para> + <programlisting># gluster volume quota test-volume disable +Quota translator is disabled on /test-volume</programlisting> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Dir_Quota-Set_Replace"> + <title>Setting or Replacing Disk Limit </title> + <para>You can create new directories in your storage environment and set the disk limit or set disk limit for +the existing directories. The directory name should be relative to the volume with the export +directory/mount being treated as "/". +</para> + <para><emphasis role="bold">To set or replace disk limit</emphasis> +</para> + <itemizedlist> + <listitem> + <para>Set the disk limit using the following command: +</para> + <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> limit-usage /<replaceable>directory</replaceable><replaceable>limit-value</replaceable></command></para> + <para>For example, to set limit on data directory on test-volume where data is a directory under the +export directory: +</para> + <programlisting># gluster volume quota test-volume limit-usage /data 10GB +Usage limit has been set on /data</programlisting> + <para><note> + <para>In a multi-level directory hierarchy, the strictest disk limit will be considered for enforcement. +</para> + </note></para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Dir_Quota-Display"> + <title>Displaying Disk Limit Information </title> + <para>You can display disk limit information on all the directories on which the limit is set. +</para> + <para><emphasis role="bold">To display disk limit information</emphasis> +</para> + <itemizedlist> + <listitem> + <para>Display disk limit information of all the directories on which limit is set, using the following +command: +</para> + <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list</command> +</para> + <para>For example, to see the set disks limit on test-volume: +</para> + <programlisting># gluster volume quota test-volume list + +<emphasis role="underline"> + <emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis> + </emphasis> +/Test/data 10 GB 6 GB +/Test/data1 10 GB 4 GB</programlisting> + </listitem> + <listitem> + <para>Display disk limit information on a particular directory on which limit is set, using the following +command: +</para> + <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list <replaceable>/directory name</replaceable></command> +</para> + <para>For example, to see the set limit on /data directory of test-volume:</para> + <programlisting># gluster volume quota test-volume list /data + +<emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis> +/Test/data 10 GB 6 GB</programlisting> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Dir_Quota-Update"> + <title> Updating Memory Cache Size </title> + <para>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating +the maximum valid duration of directory sizes in cache, from the time they are populated. +</para> + <para>For example: If there are multiple clients writing to a single directory, there are chances that some +other client might write till the quota limit is exceeded. However, this new file-size may not get +reflected in the client till size entry in cache has become stale because of timeout. If writes happen +on this client during this duration, they are allowed even though they would lead to exceeding of +quota-limits, since size in cache is not in sync with the actual size. When timeout happens, the size +in cache is updated from servers and will be in sync and no further writes will be allowed. A timeout +of zero will force fetching of directory sizes from server for every operation that modifies file data +and will effectively disables directory size caching on client side. +</para> + <para><emphasis role="bold">To update the memory cache size</emphasis> +</para> + <itemizedlist> + <listitem> + <para>Update the memory cache size using the following command: +</para> + <para><command># gluster volume set <replaceable>VOLNAME</replaceable> features.quota-timeout<replaceable> value</replaceable></command></para> + <para>For example, to update the memory cache size for every 5 seconds on test-volume: +</para> + <programlisting># gluster volume set test-volume features.quota-timeout 5 +Set volume successful</programlisting> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Dir_Quota-Remove"> + <title>Removing Disk Limit </title> + <para>You can remove set disk limit, if you do not want quota anymore. +</para> + <para><emphasis role="bold">To remove disk limit </emphasis></para> + <itemizedlist> + <listitem> + <para>Remove disk limit set on a particular directory using the following command: +</para> + <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> remove <replaceable>/directory name</replaceable></command> +</para> + <para>For example, to remove the disk limit on /data directory of test-volume: +</para> + <programlisting># gluster volume quota test-volume remove /data +Usage limit set on /data is removed</programlisting> + </listitem> + </itemizedlist> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_geo-replication.xml b/doc/legacy/docbook/admin_geo-replication.xml new file mode 100644 index 00000000000..279e9a62c97 --- /dev/null +++ b/doc/legacy/docbook/admin_geo-replication.xml @@ -0,0 +1,732 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter id="chap-Administration_Guide-Geo_Rep"> + <title>Managing Geo-replication</title> + <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from one site to another over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. </para> + <para>Geo-replication uses a master–slave model, whereby replication and mirroring occurs between the following partners:</para> + <itemizedlist> + <listitem> + <para>Master – a GlusterFS volume </para> + </listitem> + <listitem> + <para>Slave – a slave which can be of the following types: </para> + <itemizedlist> + <listitem> + <para>A local directory which can be represented as file URL like <filename>file:///path/to/dir</filename>. You can use shortened form, for example, <filename> /path/to/dir</filename>.</para> + </listitem> + <listitem> + <para>A GlusterFS Volume - Slave volume can be either a local volume like <filename>gluster://localhost:volname</filename> (shortened form - <filename>:volname</filename>) or a volume served by different host like <filename>gluster://host:volname</filename> (shortened form - <filename>host:volname</filename>).</para> + </listitem> + </itemizedlist> + <note> + <para> Both of the above types can be accessed remotely using SSH tunnel. To use SSH, add an SSH prefix to either a file URL or gluster type URL. For example, <literal> ssh://root@remote-host:/path/to/dir</literal> (shortened form - <literal>root@remote-host:/path/to/dir</literal>) or <literal>ssh://root@remote-host:gluster://localhost:volname</literal> (shortened from - <literal>root@remote-host::volname</literal>). </para> + </note> + </listitem> + </itemizedlist> + <para>This section introduces Geo-replication, illustrates the various deployment scenarios, and explains how to configure the system to provide replication and mirroring in your environment. </para> + <section id="chap-Administration_Guide-Geo_Rep-Replicated_volumes"> + <title>Replicated Volumes vs Geo-replication</title> + <para>The following table lists the difference between replicated volumes and geo-replication:</para> + <informaltable frame="all"> + <tgroup cols="2"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <thead> + <row> + <entry>Replicated Volumes</entry> + <entry>Geo-replication</entry> + </row> + </thead> + <tbody> + <row> + <entry>Mirrors data across clusters</entry> + <entry>Mirrors data across geographically distributed clusters </entry> + </row> + <row> + <entry>Provides high-availability</entry> + <entry>Ensures backing up of data for disaster recovery</entry> + </row> + <row> + <entry>Synchronous replication (each and every file operation is sent across all the bricks)</entry> + <entry>Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences) </entry> + </row> + </tbody> + </tgroup> + </informaltable> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Preparation"> + <title>Preparing to Deploy Geo-replication</title> + <para>This section provides an overview of the Geo-replication deployment scenarios, describes how you can check the minimum system requirements, and explores common deployment scenarios.</para> + <itemizedlist> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave"/></para> + </listitem> + </itemizedlist> + <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"> + <title>Exploring Geo-replication Deployment Scenarios</title> + <para>Geo-replication provides an incremental replication service over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. This section illustrates the most common deployment scenarios for Geo-replication, including the following: </para> + <itemizedlist> + <listitem> + <para>Geo-replication over LAN +</para> + </listitem> + <listitem> + <para>Geo-replication over WAN +</para> + </listitem> + <listitem> + <para>Geo-replication over the Internet</para> + </listitem> + <listitem> + <para>Multi-site cascading Geo-replication</para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">Geo-replication over LAN</emphasis></para> + <para>You can configure Geo-replication to mirror data over a Local Area Network. </para> + <mediaobject> + <textobject> + <phrase>Geo-replication over LAN</phrase> + </textobject> + <imageobject> + <imagedata fileref="images/Geo-Rep_LAN.png"/> + </imageobject> + </mediaobject> + <para><emphasis role="bold">Geo-replication over WAN</emphasis></para> + <para>You can configure Geo-replication to replicate data over a Wide Area Network.</para> + <mediaobject> + <textobject> + <phrase> + <phrase>Geo-replication over WAN</phrase> + </phrase> + </textobject> + <imageobject> + <imagedata fileref="images/Geo-Rep_WAN.png"/> + </imageobject> + </mediaobject> + <para><emphasis role="bold">Geo-replication over Internet</emphasis></para> + <para>You can configure Geo-replication to mirror data over the Internet.</para> + <mediaobject> + <textobject> + <phrase> + <phrase>Geo-replication over Internet</phrase> + </phrase> + </textobject> + <imageobject> + <imagedata fileref="images/Geo-Rep03_Internet.png"/> + </imageobject> + </mediaobject> + <para><emphasis role="bold">Multi-site cascading Geo-replication</emphasis> </para> + <para>You can configure Geo-replication to mirror data in a cascading fashion across multiple sites. </para> + <mediaobject> + <textobject> + <phrase> + <phrase>Multi-site cascading Geo-replication </phrase> + </phrase> + </textobject> + <imageobject> + <imagedata fileref="images/Geo-Rep04_Cascading.png"/> + </imageobject> + </mediaobject> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview"> + <title>Geo-replication Deployment Overview</title> + <para>Deploying Geo-replication involves the following steps:</para> + <orderedlist> + <listitem> + <para>Verify that your environment matches the minimum system requirement. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/>.</para> + </listitem> + <listitem> + <para>Determine the appropriate deployment scenario. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/>.</para> + </listitem> + <listitem> + <para>Start Geo-replication on master and slave systems, as required. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting"/>.</para> + </listitem> + </orderedlist> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"> + <title>Checking Geo-replication Minimum Requirements</title> + <para condition="gfs">Before deploying GlusterFS Geo-replication, verify that your systems match the minimum requirements. </para> + <para condition="gfs">The following table outlines the minimum requirements for both master and slave nodes within your environment:</para> + <informaltable frame="all" condition="gfs"> + <tgroup cols="3"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <thead> + <row> + <entry>Component</entry> + <entry>Master</entry> + <entry>Slave</entry> + </row> + </thead> + <tbody> + <row> + <entry>Operating System </entry> + <entry>GNU/Linux</entry> + <entry>GNU/Linux</entry> + </row> + <row> + <entry>Filesystem</entry> + <entry>GlusterFS 3.2 or higher</entry> + <entry>GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively) </entry> + </row> + <row> + <entry>Python </entry> + <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry> + <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry> + </row> + <row> + <entry>Secure shell </entry> + <entry>OpenSSH version 4.0 (or higher)</entry> + <entry>SSH2-compliant daemon </entry> + </row> + <row> + <entry>Remote synchronization</entry> + <entry>rsync 3.0.7 or higher </entry> + <entry>rsync 3.0.7 or higher </entry> + </row> + <row> + <entry>FUSE </entry> + <entry>GlusterFS supported versions </entry> + <entry>GlusterFS supported versions </entry> + </row> + </tbody> + </tgroup> + </informaltable> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"> + <title>Setting Up the Environment for Geo-replication</title> + <para><emphasis role="bold">Time Synchronization</emphasis> </para> + <itemizedlist> + <listitem> + <para>On bricks of a geo-replication master volume, all the servers' time must be uniform. You are recommended to set up NTP (Network Time Protocol) service to keep the bricks sync in time and avoid out-of-time sync effect.</para> + <para>For example: In a Replicated volume where brick1 of the master is at 12.20 hrs and brick 2 of the master is at 12.10 hrs with 10 minutes time lag, all the changes in brick2 between this period may go unnoticed during synchronization of files with Slave.</para> + <para>For more information on setting up NTP, see <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html"/>.</para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">To setup Geo-replication for SSH </emphasis></para> + <para>Password-less login has to be set up between the host machine (where geo-replication Start command will be issued) and the remote machine (where slave process should be launched through SSH).</para> + <orderedlist> + <listitem> + <para>On the node where geo-replication sessions are to be set up, run the following command:</para> + <para><command># ssh-keygen -f /var/lib/glusterd/geo-replication/secret.pem</command> +</para> + <para>Press Enter twice to avoid passphrase. +</para> + </listitem> + <listitem> + <para>Run the following command on master for all the slave hosts: </para> + <para><command># ssh-copy-id -i /var/lib/glusterd/geo-replication/secret.pem.pub <varname>user</varname>@<varname>slavehost</varname></command></para> + </listitem> + </orderedlist> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave"> + <title>Setting Up the Environment for a Secure Geo-replication Slave</title> + <para>You can configure a secure slave using SSH so that master is granted a +restricted access. With GlusterFS, you need not specify +configuration parameters regarding the slave on the master-side +configuration. For example, the master does not require the location of +the rsync program on slave but the slave must ensure that rsync is in +the PATH of the user which the master connects using SSH. The only +information that master and slave have to negotiate are the slave-side +user account, slave's resources that master uses as slave resources, and +the master's public key. Secure access to the slave can be established +using the following options:</para> + <itemizedlist> + <listitem> + <para>Restricting Remote Command Execution</para> + </listitem> + <listitem> + <para>Using <filename>Mountbroker</filename> for Slaves</para> + </listitem> + <listitem> + <para>Using IP based Access Control</para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">Backward Compatibility</emphasis> </para> + <para>Your existing Ge-replication environment will work with GlusterFS, +except for the following:</para> + <itemizedlist> + <listitem> + <para>The process of secure reconfiguration affects only the glusterfs +instance on slave. The changes are transparent to master with the +exception that you may have to change the SSH target to an unprivileged + account on slave.</para> + </listitem> + <listitem> + <para>The following are the some exceptions where this might not work:</para> + <para><itemizedlist> + <listitem> + <para>Geo-replication URLs which specify the slave resource when configuring master will include the following special characters: space, *, ?, [;</para> + </listitem> + <listitem> + <para>Slave must have a running instance of glusterd, even if there is no +gluster volume among the mounted slave resources (that is, file tree +slaves are used exclusively) .</para> + </listitem> + </itemizedlist></para> + </listitem> + </itemizedlist> + <section> + <title>Restricting Remote Command Execution</title> + <para>If you restrict remote command execution, then the Slave audits commands +coming from the master and the commands related to the given +geo-replication session is allowed. The Slave also provides access only +to the files within the slave resource which can be read or manipulated +by the Master.</para> + <para>To restrict remote command execution:</para> + <orderedlist> + <listitem> + <para>Identify the location of the gsyncd helper utility on Slave. This utility is installed in <filename>PREFIX/libexec/glusterfs/gsyncd</filename>, where PREFIX is a compile-time parameter of glusterfs. For example, <filename>--prefix=PREFIX</filename> to the configure script with the following common values<filename> /usr, /usr/local, and /opt/glusterfs/glusterfs_version</filename>.</para> + </listitem> + <listitem> + <para>Ensure that command invoked from master to slave passed through the slave's gsyncd utility. </para> + <para>You can use either of the following two options:</para> + <itemizedlist> + <listitem> + <para>Set gsyncd with an absolute path as the shell for the account +which the master connects through SSH. If you need to use a privileged +account, then set it up by creating a new user with UID 0. </para> + </listitem> + <listitem> + <para>Setup key authentication with command enforcement to gsyncd. You must prefix the copy of master's public key in the Slave account's <filename>authorized_keys</filename> file with the following command:</para> + <para><filename>command=<path to gsyncd></filename>. </para> + <para>For example, <command>command="PREFIX/glusterfs/gsyncd" ssh-rsa AAAAB3Nza....</command></para> + </listitem> + </itemizedlist> + </listitem> + </orderedlist> + </section> + <section> + <title>Using Mountbroker for Slaves </title> + <para><filename>mountbroker</filename> is a new service of glusterd. This service allows an +unprivileged process to own a GlusterFS mount by registering a label +(and DSL (Domain-specific language) options ) with glusterd through a +glusterd volfile. Using CLI, you can send a mount request to glusterd to +receive an alias (symlink) of the mounted volume.</para> + <para>A request from the agent , the unprivileged slave agents use the +mountbroker service of glusterd to set up an auxiliary gluster mount for +the agent in a special environment which ensures that the agent is only +allowed to access with special parameters that provide administrative +level access to the particular volume.</para> + <para><emphasis role="bold">To setup an auxiliary gluster mount for the agent</emphasis>:</para> + <orderedlist> + <listitem> + <para>Create a new group. For example, <filename>geogroup</filename>.</para> + </listitem> + <listitem> + <para>Create a unprivileged account. For example, <filename> geoaccount</filename>. Make it a member of <filename> geogroup</filename>.</para> + </listitem> + <listitem> + <para>Create a new directory owned by root and with permissions <emphasis role="italic">0711.</emphasis> For example, create a create mountbroker-root directory <filename>/var/mountbroker-root</filename>.</para> + </listitem> + <listitem> + <para>Add the following options to the glusterd volfile, assuming the name of the slave gluster volume as <filename>slavevol</filename>:</para> + <para><command>option mountbroker-root /var/mountbroker-root </command></para> + <para><command>option mountbroker-geo-replication.geoaccount slavevol</command></para> + <para><command>option geo-replication-log-group geogroup</command></para> + <para>If you are unable to locate the glusterd volfile at <filename>/etc/glusterfs/glusterd.vol</filename>, you can create a volfile containing both the default configuration and the above options and place it at <filename>/etc/glusterfs/</filename>. </para> + <para>A sample glusterd volfile along with default options:</para> + <para><screen>volume management + type mgmt/glusterd + option working-directory /var/lib/glusterd + option transport-type socket,rdma + option transport.socket.keepalive-time 10 + option transport.socket.keepalive-interval 2 + option transport.socket.read-fail-log off + + option mountbroker-root /var/mountbroker-root + option mountbroker-geo-replication.geoaccount slavevol + option geo-replication-log-group geogroup +end-volume</screen></para> + <para>If you host multiple slave volumes on Slave, you can repeat step 2. for each of them and add the following options to the <filename>volfile</filename>:</para> + <para><screen>option mountbroker-geo-replication.geoaccount2 slavevol2 +option mountbroker-geo-replication.geoaccount3 slavevol3</screen></para> + </listitem> + <listitem> + <para>Setup Master to access Slave as <filename>geoaccount@Slave</filename>.</para> + <para>You can add multiple slave volumes within the same account (geoaccount) by providing comma-separated list (without spaces) as the argument of <command>mountbroker-geo-replication.geogroup</command>. You can also have multiple options of the form <command>mountbroker-geo-replication.*</command>. It is recommended to use one service account per Master machine. For example, if there are multiple slave volumes on Slave for the master machines Master1, Master2, and Master3, then create a dedicated service user on Slave for them by repeating Step 2. for each (like geogroup1, geogroup2, and geogroup3), and then add the following corresponding options to the volfile: +</para> + <para><command>option mountbroker-geo-replication.geoaccount1 slavevol11,slavevol12,slavevol13</command></para> + <para><command>option mountbroker-geo-replication.geoaccount2 slavevol21,slavevol22</command></para> + <para><command>option mountbroker-geo-replication.geoaccount3 slavevol31</command></para> + <para> +Now set up Master1 to ssh to geoaccount1@Slave, etc. +</para> + <para>You must restart glusterd after making changes in the configuration to effect the updates. </para> + </listitem> + </orderedlist> + </section> + <section> + <title>Using IP based Access Control</title> + <para>You can use IP based access control method to provide access control for +the slave resources using IP address. You can use method for both Slave +and file tree slaves, but in the section, we are focusing on file tree +slaves using this method.</para> + <para>To set access control based on IP address for file tree slaves:</para> + <orderedlist> + <listitem> + <para>Set a general restriction for accessibility of file tree resources: +</para> + <para><command># gluster volume geo-replication '/*' config allow-network ::1,127.0.0.1 </command></para> + <para>This will refuse all requests for spawning slave agents except for +requests initiated locally.</para> + </listitem> + <listitem> + <para>If you want the to lease file tree at <filename>/data/slave-tree</filename> to Master, enter the following command:</para> + <para><command># gluster volume geo-replication<varname> /data/slave-tree </varname>config allow-network <varname>MasterIP</varname></command></para> + <para><varname>MasterIP</varname> is the IP address of Master. The slave agent spawn request from +master will be accepted if it is executed at <filename>/data/slave-tree</filename>.</para> + </listitem> + </orderedlist> + <para>If the Master side network configuration does not enable the Slave to +recognize the exact IP address of Master, you can use CIDR notation to +specify a subnet instead of a single IP address as MasterIP or even +comma-separated lists of CIDR subnets.</para> + <para>If you want to extend IP based access control to gluster slaves, use the following command:</para> + <para><command># gluster volume geo-replication '*' config allow-network ::1,127.0.0.1</command></para> + </section> + </section> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Starting"> + <title>Starting Geo-replication</title> + <para>This section describes how to configure and start Gluster Geo-replication in your storage environment, and verify that it is functioning correctly. </para> + <itemizedlist> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Start"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Verify"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Display"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Stop"/></para> + </listitem> + </itemizedlist> + <section id="chap-Administration_Guide-Geo_Rep-Starting-Start"> + <title>Starting Geo-replication</title> + <para>To start Gluster Geo-replication </para> + <itemizedlist> + <listitem> + <para>Start geo-replication between the hosts using the following command: + </para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start</command> +</para> + <para>For example: +</para> + <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir start +Starting geo-replication session between Volume1 +example.com:/data/remote_dir has been successful</programlisting></para> + <para><note> + <para>You may need to configure the service before starting Gluster Geo-replication. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/>.</para> + </note></para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Starting-Verify"> + <title>Verifying Successful Deployment</title> + <para>You can use the gluster command to verify the status of Gluster Geo-replication in your environment.</para> + <para><emphasis role="bold">To verify the status Gluster Geo-replication</emphasis></para> + <itemizedlist> + <listitem> + <para>Verify the status by issuing the following command on host:</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command> +</para> + <para>For example: +</para> + <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command> +</para> + <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status + +MASTER SLAVE STATUS +______ ______________________________ ____________ +Volume1 root@example.com:/data/remote_dir Starting....</programlisting> +</para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Starting-Display"> + <title>Displaying Geo-replication Status Information</title> + <para>You can display status information about a specific geo-replication master session, or a particular master-slave session, or all geo-replication sessions, as needed.</para> + <para><emphasis role="bold">To display geo-replication status information</emphasis></para> + <itemizedlist> + <listitem> + <para>Display information of all geo-replication sessions using the following command:</para> + <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status + +MASTER SLAVE STATUS +______ ______________________________ ____________ +Volume1 root@example.com:/data/remote_dir Starting....</programlisting></para> + </listitem> + </itemizedlist> + <itemizedlist> + <listitem> + <para>Display information of a particular master slave session using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command> +</para> + <para>For example, to display information of Volume1 and example.com:/data/remote_dir +</para> + <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command> +</para> + <para>The status of the geo-replication between Volume1 and example.com:/data/remote_dir is displayed.</para> + </listitem> + <listitem> + <para>Display information of all geo-replication sessions belonging to a master</para> + <para><command># gluster volume geo-replication MASTER status</command> +</para> + <para>For example, to display information of Volume1</para> + <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status + +MASTER SLAVE STATUS +______ ______________________________ ____________ +Volume1 ssh://example.com:gluster://127.0.0.1:remove_volume OK + +Volume1 ssh://example.com:file:///data/remote_dir OK</programlisting></para> + <para>The status of a session could be one of the following four:</para> + </listitem> + <listitem> + <para><emphasis role="bold">Starting</emphasis>: This is the initial phase of the Geo-replication session; it remains in this state for a minute, to make sure no abnormalities are present.</para> + </listitem> + <listitem> + <para><emphasis role="bold">OK</emphasis>: The geo-replication session is in a stable state.</para> + </listitem> + <listitem> + <para><emphasis role="bold">Faulty</emphasis>: The geo-replication session has witnessed some abnormality and the situation has to be investigated further. For further information, see <xref linkend="chap-Administration_Guide-Troubleshooting"/> section.</para> + </listitem> + <listitem> + <para><emphasis role="bold">Corrupt</emphasis>: The monitor thread which is monitoring the geo-replication session has died. This situation should not occur normally, if it persists contact Red Hat Support<ulink url="www.redhat.com/support/"/>.</para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Starting-Configure"> + <title>Configuring Geo-replication</title> + <para>To configure Gluster Geo-replication </para> + <itemizedlist> + <listitem> + <para>Use the following command at the Gluster command line: +</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> config [options]</command> +</para> + <para>For more information about the options, see <xref linkend="chap-Administration_Guide-Com_Ref"/>. +</para> + <para>For example: +</para> + <para>To view list of all option/value pair, use the following command: +</para> + <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config</command> +</para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Starting-Stop"> + <title>Stopping Geo-replication</title> + <para>You can use the gluster command to stop Gluster Geo-replication (syncing of data from Master to Slave) in your environment. </para> + <para><emphasis role="bold">To stop Gluster Geo-replication</emphasis> </para> + <itemizedlist> + <listitem> + <para>Stop geo-replication between the hosts using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command></para> + <para>For example: +</para> + <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir stop +Stopping geo-replication session between Volume1 and +example.com:/data/remote_dir has been successful</programlisting></para> + <para>See <xref linkend="chap-Administration_Guide-Com_Ref"/> for more information about the gluster command. +</para> + </listitem> + </itemizedlist> + </section> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Restoring_Data"> + <title>Restoring Data from the Slave</title> + <para>You can restore data from the slave to the master volume, whenever the master volume becomes faulty for reasons like hardware failure. +</para> + <para>The example in this section assumes that you are using the Master Volume (Volume1) with the following configuration: +</para> + <para><programlisting>machine1# gluster volume info +Type: Distribute +Status: Started +Number of Bricks: 2 +Transport-type: tcp +Bricks: +Brick1: machine1:/export/dir16 +Brick2: machine2:/export/dir16 +Options Reconfigured: +geo-replication.indexing: on</programlisting></para> + <para>The data is syncing from master volume (Volume1) to slave directory (example.com:/data/remote_dir). To view the status of this geo-replication session run the following command on Master: </para> + <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status + +MASTER SLAVE STATUS +______ ______________________________ ____________ +Volume1 root@example.com:/data/remote_dir OK</programlisting> + <para><emphasis role="bold">Before Failure</emphasis> +</para> + <para>Assume that the Master volume had 100 files and was mounted at /mnt/gluster on one of the client machines (client). Run the following command on Client machine to view the list of files: +</para> + <para><programlisting>client# ls /mnt/gluster | wc –l +100</programlisting></para> + <para>The slave directory (example.com) will have same data as in the master volume and same can be viewed by running the following command on slave: +</para> + <para><programlisting>example.com# ls /data/remote_dir/ | wc –l +100</programlisting></para> + <para><emphasis role="bold">After Failure</emphasis> +</para> + <para>If one of the bricks (machine2) fails, then the status of Geo-replication session is changed from "OK" to "Faulty". To view the status of this geo-replication session run the following command on Master: +</para> + <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status + +MASTER SLAVE STATUS +______ ______________________________ ____________ +Volume1 root@example.com:/data/remote_dir Faulty</programlisting> + <para>Machine2 is failed and now you can see discrepancy in number of files between master and slave. Few files will be missing from the master volume but they will be available only on slave as shown below. +</para> + <para>Run the following command on Client: + </para> + <para><programlisting>client # ls /mnt/gluster | wc –l +52</programlisting></para> + <para>Run the following command on slave (example.com): +</para> + <para><programlisting>Example.com# # ls /data/remote_dir/ | wc –l +100</programlisting></para> + <para><emphasis role="bold">To restore data from the slave machine</emphasis></para> + <orderedlist> + <listitem> + <para>Stop all Master's geo-replication sessions using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop</command> +</para> + <para>For example: +</para> + <para><programlisting>machine1# gluster volume geo-replication Volume1 +example.com:/data/remote_dir stop + +Stopping geo-replication session between Volume1 & +example.com:/data/remote_dir has been successful</programlisting></para> + <para><note> + <para>Repeat <command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command>command on all active geo-replication sessions of master volume.</para> + </note></para> + </listitem> + <listitem> + <para>Replace the faulty brick in the master by using the following command: +</para> + <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> start</command> +</para> + <para>For example: +</para> + <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 start +Replace-brick started successfully</programlisting></para> + </listitem> + <listitem> + <para>Commit the migration of data using the following command: +</para> + <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> commit force </command></para> + <para>For example: +</para> + <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 commit force +Replace-brick commit successful</programlisting></para> + </listitem> + <listitem> + <para>Verify the migration of brick by viewing the volume info using the following command: +</para> + <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para> + <para>For example: +</para> + <para><programlisting>machine1# gluster volume info +Volume Name: Volume1 +Type: Distribute +Status: Started +Number of Bricks: 2 +Transport-type: tcp +Bricks: +Brick1: machine1:/export/dir16 +Brick2: machine3:/export/dir16 +Options Reconfigured: +geo-replication.indexing: on</programlisting></para> + </listitem> + <listitem> + <para>Run rsync command manually to sync data from slave to master volume's client (mount point). +</para> + <para>For example: +</para> + <para><command>example.com# rsync -PavhS --xattrs --ignore-existing /data/remote_dir/ client:/mnt/gluster</command></para> + <para>Verify that the data is synced by using the following command: +</para> + <para>On master volume, run the following command: +</para> + <para><programlisting>Client # ls | wc –l +100</programlisting></para> + <para>On the Slave run the following command: +</para> + <para><programlisting>example.com# ls /data/remote_dir/ | wc –l +100</programlisting></para> + <para>Now Master volume and Slave directory is synced. +</para> + </listitem> + <listitem> + <para>Restart geo-replication session from master to slave using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start </command></para> + <para>For example: +</para> + <para><programlisting>machine1# gluster volume geo-replication Volume1 +example.com:/data/remote_dir start +Starting geo-replication session between Volume1 & +example.com:/data/remote_dir has been successful</programlisting></para> + </listitem> + </orderedlist> + </section> + <section id="chap-Administration_Guide-Geo_Rep-Best_Practices"> + <title>Best Practices</title> + <para><emphasis role="bold">Manually Setting Time </emphasis></para> + <para>If you have to change the time on your bricks manually, then you must set uniform time on all bricks. This avoids the out-of-time sync issue described in <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/>. Setting time backward corrupts the geo-replication index, so the recommended way to set the time manually is: +</para> + <orderedlist> + <listitem> + <para>Stop geo-replication between the master and slave using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> sto</command>p +</para> + </listitem> + <listitem> + <para>Stop the geo-replication indexing using the following command: +</para> + <para><command># gluster volume set <replaceable>MASTER</replaceable> geo-replication.indexing of</command>f</para> + </listitem> + <listitem> + <para>Set uniform time on + all bricks.s</para> + </listitem> + <listitem> + <para>Restart your geo-replication sessions by using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE </replaceable>start </command></para> + </listitem> + </orderedlist> + <para><emphasis role="bold">Running Geo-replication commands in one system</emphasis> +</para> + <para>It is advisable to run the geo-replication commands in one of the bricks in the trusted storage pool. This is because, the log files for the geo-replication session would be stored in the *Server* where the Geo-replication start is initiated. Hence it would be easier to locate the log-files when required. +</para> + <para><emphasis role="bold">Isolation </emphasis></para> + <para>Geo-replication slave operation is not sandboxed as of now and is ran as a privileged service. So for the security reason, it is advised to create a sandbox environment (dedicated machine / dedicated virtual machine / chroot/container type solution) by the administrator to run the geo-replication slave in it. Enhancement in this regard will be available in follow-up minor release. +</para> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_managing_volumes.xml b/doc/legacy/docbook/admin_managing_volumes.xml new file mode 100644 index 00000000000..70c1fe0b995 --- /dev/null +++ b/doc/legacy/docbook/admin_managing_volumes.xml @@ -0,0 +1,741 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<chapter id="chap-Administration_Guide-Managing_Volumes"> + <title>Managing GlusterFS Volumes</title> + <para>This section describes how to perform common GlusterFS management operations, including the following: </para> + <itemizedlist> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Expanding"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Shrinking"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Migrating"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Stop"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Self_heal"/></para> + </listitem> + </itemizedlist> + <section id="sect-Administration_Guide-Managing_Volumes-Tuning"> + <title>Tuning Volume Options</title> + <para>You can tune volume options, as needed, while the cluster is online and available. </para> + <para><note> + <para>Red Hat recommends you to set server.allow-insecure option to ON if there are too many bricks in each volume or if there are too many services which have already utilized all the privileged ports in the system. Turning this option ON allows ports to accept/reject messages from insecure ports. So, use this option only if your deployment requires it. </para> + </note></para> + <para>To tune volume options </para> + <itemizedlist> + <listitem> + <para>Tune volume options using the following command:</para> + <para><command># gluster volume set <replaceable>VOLNAME OPTION PARAMETER</replaceable></command></para> + <para>For example, to specify the performance cache size for test-volume:</para> + <para><programlisting># gluster volume set test-volume performance.cache-size 256MB +Set volume successful</programlisting></para> + <para>The following table lists the Volume options along with its description and default value: </para> + <para><note> + <para>The default options given here are subject to modification at any given time and may not be the same for all versions.</para> + </note></para> + <informaltable frame="all"> + <tgroup cols="4"> + <colspec colname="c1"/> + <colspec colname="c2"/> + <colspec colname="c3"/> + <colspec colname="c4"/> + <thead> + <row> + <entry>Option</entry> + <entry>Description</entry> + <entry>Default Value</entry> + <entry>Available Options</entry> + </row> + </thead> + <tbody> + <row> + <entry>auth.allow</entry> + <entry>IP addresses of the clients which should be allowed to access the volume. </entry> + <entry>* (allow all)</entry> + <entry>Valid IP address which includes wild card patterns including *, such as 192.168.1.*</entry> + </row> + <row> + <entry>auth.reject</entry> + <entry>IP addresses of the clients which should be denied to access the volume. </entry> + <entry>NONE (reject none) </entry> + <entry>Valid IP address which includes wild card patterns including *, such as 192.168.2.*</entry> + </row> + <row> + <entry>client.grace-timeout</entry> + <entry>Specifies the duration for the lock state to be maintained on the client after a network disconnection.</entry> + <entry>10 </entry> + <entry>10 - 1800 secs</entry> + </row> + <row> + <entry>cluster.self-heal-window-size</entry> + <entry>Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. </entry> + <entry>16 </entry> + <entry>0 - 1025 blocks</entry> + </row> + <row> + <entry>cluster.data-self-heal-algorithm</entry> + <entry>Specifies the type of self-heal. If you set the option as "full", the entire file is copied from source to destinations. If the option is set to "diff" the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the "diff" algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than "diff" which has to read checksums and then read and write. </entry> + <entry>reset</entry> + <entry>full | diff | reset</entry> + </row> + <row> + <entry>cluster.min-free-disk</entry> + <entry>Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. </entry> + <entry>10%</entry> + <entry>Percentage of required minimum free disk space</entry> + </row> + <row> + <entry>cluster.stripe-block-size</entry> + <entry>Specifies the size of the stripe unit that will be read from or written to. </entry> + <entry>128 KB (for all files)</entry> + <entry>size in bytes</entry> + </row> + <row> + <entry>cluster.self-heal-daemon</entry> + <entry>Allows you to turn-off proactive self-heal on replicated volumes.</entry> + <entry>on</entry> + <entry>On | Off</entry> + </row> + <row> + <entry>cluster.ensure-durability</entry> + <entry>This option makes sure the data/metadata is durable across abrupt shutdown of the brick. </entry> + <entry>on</entry> + <entry>On | Off</entry> + </row> + <row> + <entry>diagnostics.brick-log-level</entry> + <entry>Changes the log-level of the bricks. </entry> + <entry>INFO </entry> + <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry> + </row> + <row> + <entry>diagnostics.client-log-level</entry> + <entry>Changes the log-level of the clients. </entry> + <entry>INFO </entry> + <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry> + </row> + <row> + <entry>diagnostics.latency-measurement</entry> + <entry>Statistics related to the latency of each operation would be tracked. </entry> + <entry>off </entry> + <entry>On | Off</entry> + </row> + <row> + <entry>diagnostics.dump-fd-stats</entry> + <entry>Statistics related to file-operations would be tracked.</entry> + <entry>off </entry> + <entry>On | Off</entry> + </row> + <row> + <entry>feature.read-only</entry> + <entry>Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it.</entry> + <entry>off</entry> + <entry>On | Off</entry> + </row> + <row> + <entry>features.lock-heal</entry> + <entry>Enables self-healing of locks when the network disconnects.</entry> + <entry>on</entry> + <entry>On | Off</entry> + </row> + <row> + <entry>features.quota-timeout</entry> + <entry>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. </entry> + <entry>0</entry> + <entry>0 - 3600 secs</entry> + </row> + <row> + <entry>geo-replication.indexing</entry> + <entry>Use this option to automatically sync the changes in the filesystem from Master to Slave.</entry> + <entry>off </entry> + <entry>On | Off</entry> + </row> + <row> + <entry>network.frame-timeout</entry> + <entry>The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. </entry> + <entry>1800 (30 mins) </entry> + <entry>1800 secs</entry> + </row> + <row> + <entry>network.ping-timeout</entry> + <entry>The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. <para>This reconnect is a very expensive operation and should be avoided. +</para></entry> + <entry>42 Secs</entry> + <entry>42 Secs</entry> + </row> + <row> + <entry>nfs.enable-ino32</entry> + <entry>For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: <para>* Built 32-bit and run on 32-bit machines.</para><para>* Built 32-bit on 64-bit systems.</para><para>* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts.</para><para>Either of the conditions above can lead to application on Linux NFS clients failing with "Invalid argument" or "Value too large for defined data type" errors.</para></entry> + <entry>off</entry> + <entry>On | Off</entry> + </row> + <row> + <entry>nfs.volume-access </entry> + <entry>Set the access type for the specified sub-volume. </entry> + <entry>read-write </entry> + <entry>read-write|read-only </entry> + </row> + <row> + <entry>nfs.trusted-write </entry> + <entry>If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. <para>In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner.</para></entry> + <entry> off </entry> + <entry>On | Off </entry> + </row> + <row> + <entry>nfs.trusted-sync</entry> + <entry> All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. </entry> + <entry>off </entry> + <entry>On | Off </entry> + </row> + <row> + <entry>nfs.export-dir </entry> + <entry>By default, all sub-volumes of NFS are exported as individual exports. Now, this option allows you to export only the specified subdirectory or subdirectories in the volume. This option can also be used in conjunction with nfs3.export-volumes option to restrict exports only to the subdirectories specified through this option. You must provide an absolute path.</entry> + <entry>Enabled for all sub directories.</entry> + <entry>Enable | Disable </entry> + </row> + <row> + <entry>nfs.export-volumes </entry> + <entry>Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. </entry> + <entry>on</entry> + <entry> On | Off </entry> + </row> + <row> + <entry>nfs.rpc-auth-unix </entry> + <entry>Enable/Disable the AUTH_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required.</entry> + <entry>on </entry> + <entry> On | Off </entry> + </row> + <row> + <entry>nfs.rpc-auth-null </entry> + <entry>Enable/Disable the AUTH_NULL authentication type. It is not recommended to change the default value for this option. </entry> + <entry>on </entry> + <entry> On | Off </entry> + </row> + <row> + <entry>nfs.rpc-auth-allow<IP- Addresses> </entry> + <entry>Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes.</entry> + <entry>Reject All </entry> + <entry>IP address or Host name </entry> + </row> + <row> + <entry>nfs.rpc-auth-reject IP- Addresses </entry> + <entry>Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes.</entry> + <entry>Reject All </entry> + <entry>IP address or Host name </entry> + </row> + <row> + <entry>nfs.ports-insecure </entry> + <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry> + <entry>off</entry> + <entry> On | Off </entry> + </row> + <row> + <entry>nfs.addr-namelookup </entry> + <entry>Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.* filters. </entry> + <entry>on </entry> + <entry> On | Off </entry> + </row> + <row> + <entry>nfs.register-with- portmap </entry> + <entry>For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. </entry> + <entry>on </entry> + <entry> On | Off </entry> + </row> + <row> + <entry>nfs.port <PORT- NUMBER> </entry> + <entry>Use this option on systems that need Gluster NFS to be associated with a non-default port number. </entry> + <entry>38465- 38467 </entry> + <entry/> + </row> + <row> + <entry>nfs.disable</entry> + <entry>Turn-off volume being exported by NFS</entry> + <entry> off </entry> + <entry>On | Off </entry> + </row> + <row> + <entry>performance.write-behind-window-size </entry> + <entry>Size of the per-file write-behind buffer.</entry> + <entry>1 MB </entry> + <entry>Write-behind cache size </entry> + </row> + <row> + <entry>performance.io-thread-count </entry> + <entry>The number of threads in IO threads translator. </entry> + <entry>16</entry> + <entry>0 - 65 </entry> + </row> + <row> + <entry>performance.flush-behind </entry> + <entry>If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. </entry> + <entry>On </entry> + <entry>On | Off </entry> + </row> + <row> + <entry>performance.cache-max-file-size </entry> + <entry>Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. </entry> + <entry>2 ^ 64 -1 bytes </entry> + <entry>size in bytes </entry> + </row> + <row> + <entry>performance.cache-min-file-size </entry> + <entry> Sets the minimum file size cached by the io-cache translator. Values same as "max" above.</entry> + <entry>0B</entry> + <entry>size in bytes </entry> + </row> + <row> + <entry>performance.cache-refresh-timeout </entry> + <entry>The cached data for a file will be retained till 'cache-refresh-timeout' seconds, after which data re-validation is performed. </entry> + <entry>1 sec </entry> + <entry>0 - 61 </entry> + </row> + <row> + <entry>performance.cache-size </entry> + <entry>Size of the read cache.</entry> + <entry> 32 MB </entry> + <entry>size in bytes </entry> + </row> + <row> + <entry>server.allow-insecure </entry> + <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry> + <entry>on </entry> + <entry>On | Off </entry> + </row> + <row> + <entry>server.grace-timeout</entry> + <entry>Specifies the duration for the lock state to be maintained on the server after a network disconnection.</entry> + <entry>10</entry> + <entry>10 - 1800 secs</entry> + </row> + <row> + <entry>server.statedump-path </entry> + <entry>Location of the state dump file. </entry> + <entry>/tmp directory of the brick </entry> + <entry>New directory path</entry> + </row> + </tbody> + </tgroup> + </informaltable> + <para>You can view the changed volume options using the<command> # gluster volume info <replaceable>VOLNAME</replaceable></command> command. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/>.</para> + </listitem> + </itemizedlist> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Expanding"> + <title>Expanding Volumes</title> + <para>You can expand volumes, as needed, while the cluster is online and available. For example, you might want to add a brick to a distributed volume, thereby increasing the distribution and adding to the capacity of the GlusterFS volume. </para> + <para>Similarly, you might want to add a group of bricks to a distributed replicated volume, increasing the capacity of the GlusterFS volume. </para> + <para><note> + <para>When expanding distributed replicated and distributed striped volumes, you need to add a number of bricks that is a multiple of the replica or stripe count. For example, to expand a distributed replicated volume with a replica count of 2, you need to add bricks in multiples of 2 (such as 4, 6, 8, etc.). </para> + </note></para> + <para><emphasis role="bold">To expand a volume</emphasis> </para> + <orderedlist> + <listitem> + <para>On the first server in the cluster, probe the server to which you want to add the new brick using the following command:</para> + <para><command># gluster peer probe <replaceable>HOSTNAME</replaceable></command></para> + <para>For example:</para> + <para><programlisting># gluster peer probe server4 +Probe successful</programlisting></para> + </listitem> + <listitem> + <para>Add the brick using the following command: </para> + <para><command># gluster volume add-brick <replaceable>VOLNAME NEW-BRICK</replaceable></command></para> + <para>For example:</para> + <para><programlisting># gluster volume add-brick test-volume server4:/exp4 +Add Brick successful</programlisting></para> + </listitem> + <listitem> + <para>Check the volume information using the following command: </para> + <para><command># gluster volume info </command></para> + <para>The command displays information similar to the following:</para> + <para><programlisting>Volume Name: test-volume +Type: Distribute +Status: Started +Number of Bricks: 4 +Bricks: +Brick1: server1:/exp1 +Brick2: server2:/exp2 +Brick3: server3:/exp3 +Brick4: server4:/exp4</programlisting></para> + </listitem> + <listitem> + <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para> + <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Shrinking"> + <title>Shrinking Volumes</title> + <para>You can shrink volumes, as needed, while the cluster is online and available. For example, you might need to remove a brick that has become inaccessible in a distributed volume due to hardware or network failure. </para> + <para><note> + <para>Data residing on the brick that you are removing will no longer be accessible at the Gluster mount point. Note however that only the configuration information is removed - you can continue to access the data directly from the brick, as necessary. </para> + </note></para> + <para>When shrinking distributed replicated and distributed striped volumes, you need to remove a number of bricks that is a multiple of the replica or stripe count. For example, to shrink a distributed striped volume with a stripe count of 2, you need to remove bricks in multiples of 2 (such as 4, 6, 8, etc.). In addition, the bricks you are trying to remove must be from the same sub-volume (the same replica or stripe set). </para> + <para><emphasis role="bold">To shrink a volume</emphasis> </para> + <orderedlist> + <listitem> + <para>Remove the brick using the following command:</para> + <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command> <command>start</command></para> + <para>For example, to remove server2:/exp2:</para> + <para><programlisting># gluster volume remove-brick test-volume server2:/exp2 + +Removing brick(s) can result in data loss. Do you want to Continue? (y/n)</programlisting></para> + </listitem> + <listitem> + <para>Enter "y" to confirm the operation. The command displays the following message indicating that the remove brick operation is successfully started: </para> + <para><programlisting>Remove Brick successful </programlisting></para> + </listitem> + <listitem> + <para>(Optional) View the status of the remove brick operation using the following command:</para> + <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command><command> status</command></para> + <para>For example, to view the status of remove brick operation on server2:/exp2 brick:</para> + <para><screen># gluster volume remove-brick test-volume server2:/exp2 status + Node Rebalanced-files size scanned status + --------- ---------------- ---- ------- ----------- +617c923e-6450-4065-8e33-865e28d9428f 34 340 162 in progress</screen></para> + </listitem> + <listitem> + <para>Check the volume information using the following command: </para> + <para><command># gluster volume info </command></para> + <para>The command displays information similar to the following:</para> + <para><programlisting># gluster volume info +Volume Name: test-volume +Type: Distribute +Status: Started +Number of Bricks: 3 +Bricks: +Brick1: server1:/exp1 +Brick3: server3:/exp3 +Brick4: server4:/exp4</programlisting></para> + </listitem> + <listitem> + <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para> + <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Migrating"> + <title>Migrating Volumes</title> + <para>You can migrate the data from one brick to another, as needed, while the cluster is online and available. </para> + <para><emphasis role="bold">To migrate a volume</emphasis> </para> + <orderedlist> + <listitem> + <para>Make sure the new brick, server5 in this example, is successfully added to the cluster.</para> + <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Migrate the data from one brick to another using the following command:</para> + <para><command> # gluster volume replace-brick <code>VOLNAME</code><code> BRICK</code><code>NEW-BRICK</code> start</command></para> + <para>For example, to migrate the data in server3:/exp3 to server5:/exp5 in test-volume:</para> + <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 start +Replace brick start operation successful</programlisting></para> + <para><note> + <para>You need to have the FUSE package installed on the server on which you are running the replace-brick command for the command to work.</para> + </note></para> + </listitem> + <listitem> + <para>To pause the migration operation, if needed, use the following command: </para> + <para><command># gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname> pause </command></para> + <para>For example, to pause the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para> + <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 pause +Replace brick pause operation successful</programlisting></para> + </listitem> + <listitem> + <para>To abort the migration operation, if needed, use the following command: </para> + <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>abort </command></para> + <para>For example, to abort the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para> + <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 abort +Replace brick abort operation successful</programlisting></para> + </listitem> + <listitem> + <para>Check the status of the migration operation using the following command: </para> + <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>status </command></para> + <para>For example, to check the data migration status from server3:/exp3 to server5:/exp5 in test-volume:</para> + <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 status +Current File = /usr/src/linux-headers-2.6.31-14/block/Makefile +Number of files migrated = 10567 +Migration complete</programlisting></para> + <para>The status command shows the current file being migrated along with the current total number of files migrated. After completion of migration, it displays Migration complete.</para> + </listitem> + <listitem> + <para>Commit the migration of data from one brick to another using the following command: </para> + <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>commit </command></para> + <para>For example, to commit the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para> + <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 commit +replace-brick commit successful</programlisting></para> + </listitem> + <listitem> + <para>Verify the migration of brick by viewing the volume info using the following command: </para> + <para><command># gluster volume info <code>VOLNAME</code></command></para> + <para>For example, to check the volume information of new brick server5:/exp5 in test-volume:</para> + <para><programlisting># gluster volume info test-volume +Volume Name: testvolume +Type: Replicate +Status: Started +Number of Bricks: 4 +Transport-type: tcp +Bricks: +Brick1: server1:/exp1 +Brick2: server2:/exp2 +Brick3: server4:/exp4 +Brick4: server5:/exp5 + +The new volume details are displayed. +</programlisting></para> + <para>The new volume details are displayed.</para> + <para>In the above example, previously, there were bricks; 1,2,3, and 4 and now brick 3 is replaced by brick 5.</para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing"> + <title>Rebalancing Volumes</title> + <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. New directories created after expanding or shrinking of the volume will be evenly distributed automatically. For all the existing directories, the distribution can be fixed by rebalancing the layout and/or data. </para> + <para>This section describes how to rebalance GlusterFS volumes in your storage environment, using the following common scenarios: </para> + <itemizedlist> + <listitem> + <para>Fix Layout - Fixes the layout changes so that the files can actually go to newly added nodes. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout"/>. </para> + </listitem> + <listitem> + <para>Fix Layout and Migrate Data - Rebalances volume by fixing the layout changes and migrating the existing data. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate"/>.</para> + </listitem> + </itemizedlist> + <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout"> + <title>Rebalancing Volume to Fix Layout Changes</title> + <para>Fixing the layout is necessary because the layout structure is static for a given directory. In a scenario where new bricks have been added to the existing volume, newly created files in existing directories will still be distributed only among the old bricks. The <command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start </command>command will fix the layout information so that the files can also go to newly added nodes. When this command is issued, all the file stat information which is already cached will get revalidated. </para> + <para>A fix-layout rebalance will only fix the layout changes and does not migrate data. If you want to migrate the existing data, use<command># gluster volume rebalance <varname>VOLNAME</varname> start </command> command to rebalance data among the servers. </para> + <para><emphasis role="bold">To rebalance a volume to fix layout changes</emphasis></para> + <itemizedlist> + <listitem> + <para>Start the rebalance operation on any one of the server using the following command:</para> + <para><command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start</command></para> + <para>For example:</para> + <para><programlisting># gluster volume rebalance test-volume fix-layout start +Starting rebalance on volume test-volume has been successful</programlisting></para> + </listitem> + </itemizedlist> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate"> + <title>Rebalancing Volume to Fix Layout and Migrate Data</title> + <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. </para> + <para><emphasis role="bold">To rebalance a volume to fix layout and migrate the existing data</emphasis></para> + <itemizedlist> + <listitem> + <para>Start the rebalance operation on any one of the server using the following command:</para> + <para><command># gluster volume rebalance<varname> VOLNAME</varname> start</command></para> + <para>For example:</para> + <para><programlisting># gluster volume rebalance test-volume start +Starting rebalancing on volume test-volume has been successful</programlisting></para> + </listitem> + <listitem> + <para>Start the migration operation forcefully on any one of the server using the following command:</para> + <para><command># gluster volume rebalance<varname> VOLNAME</varname> start force</command></para> + <para>For example:</para> + <para><programlisting># gluster volume rebalance test-volume start force +Starting rebalancing on volume test-volume has been successful</programlisting></para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Displaying Status of Rebalance Operation</title> + <para>You can display the status information about rebalance volume operation, as needed. </para> + <para><emphasis role="bold">To view status of rebalance volume</emphasis></para> + <itemizedlist> + <listitem> + <para>Check the status of the rebalance operation, using the following command:</para> + <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> status</command></para> + <para>For example:</para> + <para><screen># gluster volume rebalance test-volume status + Node Rebalanced-files size scanned status + --------- ---------------- ---- ------- ----------- +617c923e-6450-4065-8e33-865e28d9428f 416 1463 312 in progress</screen></para> + <para>The time to complete the rebalance operation depends on the number of files on the volume along with the corresponding file sizes. Continue checking the rebalance status, verifying that the number of files rebalanced or total files scanned keeps increasing.</para> + <para>For example, running the status command again might display a result similar to the following:</para> + <para><screen># gluster volume rebalance test-volume status + Node Rebalanced-files size scanned status + --------- ---------------- ---- ------- ----------- +617c923e-6450-4065-8e33-865e28d9428f 498 1783 378 in progress</screen></para> + <para>The rebalance status displays the following when the rebalance is complete:</para> + <para><screen># gluster volume rebalance test-volume status + Node Rebalanced-files size scanned status + --------- ---------------- ---- ------- ----------- +617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed</screen></para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Stopping Rebalance Operation</title> + <para>You can stop the rebalance operation, as needed.</para> + <para><emphasis role="bold">To stop rebalance</emphasis></para> + <itemizedlist> + <listitem> + <para>Stop the rebalance operation using the following command:</para> + <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> stop</command></para> + <para>For example:</para> + <para><screen># gluster volume rebalance test-volume stop + Node Rebalanced-files size scanned status + --------- ---------------- ---- ------- ----------- +617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped +Stopped rebalance process on volume test-volume </screen></para> + </listitem> + </itemizedlist> + </section> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Stop"> + <title>Stopping Volumes</title> + <para>To stop a volume</para> + <orderedlist> + <listitem> + <para>Stop the volume using the following command: + +</para> + <para><command># gluster volume stop <varname>VOLNAME </varname></command></para> + <para>For example, to stop test-volume:</para> + <para><programlisting># gluster volume stop test-volume +Stopping volume will make its data inaccessible. Do you want to continue? (y/n) +</programlisting></para> + </listitem> + <listitem> + <para>Enter <userinput>y</userinput> to confirm the operation. The output of the command displays the following: + +</para> + <programlisting>Stopping volume test-volume has been successful</programlisting> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Delete"> + <title>Deleting Volumes</title> + <para>To delete a volume </para> + <orderedlist> + <listitem> + <para>Delete the volume using the following command:</para> + <para><command># gluster volume delete <varname>VOLNAME</varname></command></para> + <para>For example, to delete test-volume:</para> + <para><programlisting># gluster volume delete test-volume +Deleting volume will erase all information about the volume. Do you want to continue? (y/n)</programlisting></para> + </listitem> + <listitem> + <para>Enter <userinput role="bold">y</userinput> to confirm the operation. The command displays the following:</para> + <para><programlisting>Deleting volume test-volume has been successful</programlisting></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Managing_Volumes-Self_heal"> + <title>Triggering Self-Heal on Replicate</title> + <para>In replicate module, previously you had to manually trigger a self-heal when a brick goes offline and comes back online, to bring all the replicas in sync. Now the pro-active self-heal daemon runs in the background, diagnoses issues and automatically initiates self-healing every 10 minutes on the files which requires<emphasis role="italic"> healing</emphasis>. </para> + <para>You can view the list of files that need <emphasis role="italic">healing</emphasis>, the list of files which are currently/previously <emphasis role="italic">healed</emphasis>, list of files which are in split-brain state, and you can manually trigger self-heal on the entire volume or only on the files which need <emphasis role="italic">healing</emphasis>.</para> + <itemizedlist> + <listitem> + <para>Trigger self-heal only on the files which requires <emphasis role="italic">healing</emphasis>:</para> + <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command></para> + <para>For example, to trigger self-heal on files which requires <emphasis role="italic">healing</emphasis> of test-volume:</para> + <para><screen># gluster volume heal test-volume +Heal operation on volume test-volume has been successful</screen></para> + </listitem> + <listitem> + <para>Trigger self-heal on all the files of a volume:</para> + <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>full</command></para> + <para>For example, to trigger self-heal on all the files of of test-volume:</para> + <para><screen># gluster volume heal test-volume full +Heal operation on volume test-volume has been successful</screen></para> + </listitem> + <listitem> + <para>View the list of files that needs <emphasis role="italic">healing</emphasis>:</para> + <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info</command></para> + <para>For example, to view the list of files on test-volume that needs <emphasis role="italic">healing</emphasis>:</para> + <para><screen># gluster volume heal test-volume info +Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0 +Number of entries: 0 + +Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1 +Number of entries: 101 +/95.txt +/32.txt +/66.txt +/35.txt +/18.txt +/26.txt +/47.txt +/55.txt +/85.txt +...</screen></para> + </listitem> + <listitem> + <para>View the list of files that are self-healed:</para> + <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info healed</command> </para> + <para>For example, to view the list of files on test-volume that are self-healed:</para> + <para><screen># gluster volume heal test-volume info healed +Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0 +Number of entries: 0 + +Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1 +Number of entries: 69 +/99.txt +/93.txt +/76.txt +/11.txt +/27.txt +/64.txt +/80.txt +/19.txt +/41.txt +/29.txt +/37.txt +/46.txt +...</screen></para> + </listitem> + <listitem> + <para>View the list of files of a particular volume on which the self-heal failed:</para> + <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info failed</command> </para> + <para>For example, to view the list of files of test-volume that are not self-healed:</para> + <para><screen># gluster volume heal test-volume info failed +Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0 +Number of entries: 0 + +Brick server2:/gfs/test-volume_3 +Number of entries: 72 +/90.txt +/95.txt +/77.txt +/71.txt +/87.txt +/24.txt +...</screen></para> + </listitem> + <listitem> + <para>View the list of files of a particular volume which are in split-brain state:</para> + <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info split-brain</command> </para> + <para>For example, to view the list of files of test-volume which are in split-brain state:</para> + <para><screen># gluster volume heal test-volume info split-brain +Brick server1:/gfs/test-volume_2 +Number of entries: 12 +/83.txt +/28.txt +/69.txt +... + +Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_2 +Number of entries: 12 +/83.txt +/28.txt +/69.txt +...</screen></para> + </listitem> + </itemizedlist> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_monitoring_workload.xml b/doc/legacy/docbook/admin_monitoring_workload.xml new file mode 100644 index 00000000000..e85bc51d896 --- /dev/null +++ b/doc/legacy/docbook/admin_monitoring_workload.xml @@ -0,0 +1,878 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter id="chap-Administration_Guide-Monitor_Workload"> + <title>Monitoring your GlusterFS Workload</title> + <para>You can monitor the GlusterFS volumes on different parameters. Monitoring volumes helps in capacity planning and performance tuning tasks of the GlusterFS volume. Using these information, you can identify and troubleshoot issues. </para> + <para>You can use Volume Top and Profile commands to view the performance and identify bottlenecks/hotspots of each brick of a volume. This helps system administrators to get vital performance information whenever performance needs to be probed. </para> + <para>You can also perform statedump of the brick processes and nfs server process of a volume, and also view volume status and volume information. </para> + <section id="chap-Administration_Guide-Monitor_Workload-Profile"> + <title>Running GlusterFS Volume Profile Command</title> + <para>GlusterFS Volume Profile command provides an interface to get the per-brick I/O information for each File Operation (FOP) of a volume. The per brick information helps in identifying bottlenecks in the storage system. +</para> + <para>This section describes how to run GlusterFS Volume Profile command by performing the following operations: +</para> + <itemizedlist> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Start"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Display"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Stop"/></para> + </listitem> + </itemizedlist> + <section id="chap-Administration_Guide-Monitor_Workload-Profile-Start"> + <title>Start Profiling</title> + <para>You must start the Profiling to view the File Operation information for each brick. +</para> + <para><emphasis role="bold">To start profiling: </emphasis></para> + <itemizedlist> + <listitem> + <para>Start profiling using the following command: + </para> + </listitem> + </itemizedlist> + <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> start </command></para> + <para>For example, to start profiling on test-volume: +</para> + <para><programlisting># gluster volume profile test-volume start +Profiling started on test-volume</programlisting></para> + <para>When profiling on the volume is started, the following additional options are displayed in the Volume Info: +</para> + <para><programlisting>diagnostics.count-fop-hits: on + +diagnostics.latency-measurement: on</programlisting></para> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Profile-Display"> + <title>Displaying the I/0 Information</title> + <para>You can view the I/O information of each brick. +</para> + <para>To display I/O information: +</para> + <itemizedlist> + <listitem> + <para>Display the I/O information using the following command: +</para> + </listitem> + </itemizedlist> + <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> info</command> + +</para> + <para>For example, to see the I/O information on test-volume: + +</para> + <para><programlisting># gluster volume profile test-volume info +Brick: Test:/export/2 +Cumulative Stats: + +Block 1b+ 32b+ 64b+ +Size: + Read: 0 0 0 + Write: 908 28 8 + +Block 128b+ 256b+ 512b+ +Size: + Read: 0 6 4 + Write: 5 23 16 + +Block 1024b+ 2048b+ 4096b+ +Size: + Read: 0 52 17 + Write: 15 120 846 + +Block 8192b+ 16384b+ 32768b+ +Size: + Read: 52 8 34 + Write: 234 134 286 + +Block 65536b+ 131072b+ +Size: + Read: 118 622 + Write: 1341 594 + + +%-latency Avg- Min- Max- calls Fop + latency Latency Latency +___________________________________________________________ +4.82 1132.28 21.00 800970.00 4575 WRITE +5.70 156.47 9.00 665085.00 39163 READDIRP +11.35 315.02 9.00 1433947.00 38698 LOOKUP +11.88 1729.34 21.00 2569638.00 7382 FXATTROP +47.35 104235.02 2485.00 7789367.00 488 FSYNC + +------------------ + +------------------ + +Duration : 335 + +BytesRead : 94505058 + +BytesWritten : 195571980</programlisting></para> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Profile-Stop"> + <title>Stop Profiling</title> + <para>You can stop profiling the volume, if you do not need profiling information anymore. +</para> + <para><emphasis role="bold">To stop profiling</emphasis> +</para> + <itemizedlist> + <listitem> + <para>Stop profiling using the following command: +</para> + <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> stop</command> +</para> + <para>For example, to stop profiling on test-volume:</para> + <para><command># gluster volume profile <replaceable>test-volume</replaceable> stop</command> </para> + <para><computeroutput>Profiling stopped on test-volume</computeroutput></para> + </listitem> + </itemizedlist> + </section> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Top"> + <title> Running GlusterFS Volume TOP Command </title> + <para>GlusterFS Volume Top command allows you to view the glusterfs bricks’ performance metrics like +read, write, file open calls, file read calls, file write calls, directory open calls, and directory real +calls. The top command displays up to 100 results. +</para> + <para>This section describes how to run and view the results for the following GlusterFS Top commands: +</para> + <itemizedlist> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Read"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Write"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf"/></para> + </listitem> + <listitem> + <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf"/></para> + </listitem> + </itemizedlist> + <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count"> + <title>Viewing Open fd Count and Maximum fd Count </title> + <para>You can view both current open fd count (list of files that are currently the most opened and the +count) on the brick and the maximum open fd count (count of files that are the currently open and +the count of maximum number of files opened at any given point of time, since the servers are up +and running). If the brick name is not specified, then open fd metrics of all the bricks belonging to +the volume will be displayed. +</para> + <para><emphasis role="bold">To view open fd count and maximum fd count: </emphasis></para> + <itemizedlist> + <listitem> + <para>View open fd count and maximum fd count using the following command:</para> + <para><command># gluster volume top <replaceable>VOLNAME</replaceable> open [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command> +</para> + <para>For example, to view open fd count and maximum fd count on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable> and list top 10 open calls: +</para> + <para><command># gluster volume top <replaceable>test-volume</replaceable> open brick <replaceable>server:/export/</replaceable> list-cnt <replaceable>10</replaceable></command></para> + <para><computeroutput>Brick: server:/export/dir1 </computeroutput></para> + <para><computeroutput>Current open fd's: 34 Max open fd's: 209 </computeroutput><programlisting> ==========Open file stats======== + +open file name +call count + +2 /clients/client0/~dmtmp/PARADOX/ + COURSES.DB + +11 /clients/client0/~dmtmp/PARADOX/ + ENROLL.DB + +11 /clients/client0/~dmtmp/PARADOX/ + STUDENTS.DB + +10 /clients/client0/~dmtmp/PWRPNT/ + TIPS.PPT + +10 /clients/client0/~dmtmp/PWRPNT/ + PCBENCHM.PPT + +9 /clients/client7/~dmtmp/PARADOX/ + STUDENTS.DB + +9 /clients/client1/~dmtmp/PARADOX/ + STUDENTS.DB + +9 /clients/client2/~dmtmp/PARADOX/ + STUDENTS.DB + +9 /clients/client0/~dmtmp/PARADOX/ + STUDENTS.DB + +9 /clients/client8/~dmtmp/PARADOX/ + STUDENTS.DB</programlisting></para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Read"> + <title>Viewing Highest File Read Calls </title> + <para>You can view highest read calls on each brick. If brick name is not specified, then by default, list of +100 files will be displayed. +</para> + <para><emphasis role="bold">To view highest file Read calls:</emphasis> +</para> + <itemizedlist> + <listitem> + <para>View highest file Read calls using the following command: +</para> + <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para> + <para>For example, to view highest Read calls on brick server:/export of test-volume: +</para> + <para><command># gluster volume top <replaceable>test-volume</replaceable> read brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para> + <para><computeroutput>Brick:</computeroutput> <replaceable>server:/export/dir1</replaceable><programlisting> ==========Read file stats======== + +read filename +call count + +116 /clients/client0/~dmtmp/SEED/LARGE.FIL + +64 /clients/client0/~dmtmp/SEED/MEDIUM.FIL + +54 /clients/client2/~dmtmp/SEED/LARGE.FIL + +54 /clients/client6/~dmtmp/SEED/LARGE.FIL + +54 /clients/client5/~dmtmp/SEED/LARGE.FIL + +54 /clients/client0/~dmtmp/SEED/LARGE.FIL + +54 /clients/client3/~dmtmp/SEED/LARGE.FIL + +54 /clients/client4/~dmtmp/SEED/LARGE.FIL + +54 /clients/client9/~dmtmp/SEED/LARGE.FIL + +54 /clients/client8/~dmtmp/SEED/LARGE.FIL</programlisting> </para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Write"> + <title>Viewing Highest File Write Calls </title> + <para>You can view list of files which has highest file write calls on each brick. If brick name is not +specified, then by default, list of 100 files will be displayed. +</para> + <para><emphasis role="bold">To view highest file Write calls:</emphasis> +</para> + <itemizedlist> + <listitem> + <para>View highest file Write calls using the following command: +</para> + <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para> + <para>For example, to view highest Write calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>: +</para> + <para><command># gluster volume top <replaceable>test-volume</replaceable> write brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para> + <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Write file stats======== +write call count filename + +83 /clients/client0/~dmtmp/SEED/LARGE.FIL + +59 /clients/client7/~dmtmp/SEED/LARGE.FIL + +59 /clients/client1/~dmtmp/SEED/LARGE.FIL + +59 /clients/client2/~dmtmp/SEED/LARGE.FIL + +59 /clients/client0/~dmtmp/SEED/LARGE.FIL + +59 /clients/client8/~dmtmp/SEED/LARGE.FIL + +59 /clients/client5/~dmtmp/SEED/LARGE.FIL + +59 /clients/client4/~dmtmp/SEED/LARGE.FIL + +59 /clients/client6/~dmtmp/SEED/LARGE.FIL + +59 /clients/client3/~dmtmp/SEED/LARGE.FIL</programlisting></para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls"> + <title>Viewing Highest Open Calls on Directories </title> + <para>You can view list of files which has highest open calls on directories of each brick. If brick name is +not specified, then the metrics of all the bricks belonging to that volume will be displayed. +</para> + <para>To view list of open calls on each directory</para> + <itemizedlist> + <listitem> + <para>View list of open calls on each directory using the following command: +</para> + <para><command># gluster volume top <replaceable>VOLNAME</replaceable> opendir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para> + <para>For example, to view open calls on brick server:/export/ of test-volume: +</para> + <para><command># gluster volume top <replaceable>test-volume</replaceable> opendir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para> + <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Directory open stats======== + +Opendir count directory name + +1001 /clients/client0/~dmtmp + +454 /clients/client8/~dmtmp + +454 /clients/client2/~dmtmp + +454 /clients/client6/~dmtmp + +454 /clients/client5/~dmtmp + +454 /clients/client9/~dmtmp + +443 /clients/client0/~dmtmp/PARADOX + +408 /clients/client1/~dmtmp + +408 /clients/client7/~dmtmp + +402 /clients/client4/~dmtmp</programlisting></para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls"> + <title>Viewing Highest Read Calls on Directory </title> + <para>You can view list of files which has highest directory read calls on each brick. If brick name is not +specified, then the metrics of all the bricks belonging to that volume will be displayed. +</para> + <para><emphasis role="bold">To view list of highest directory read calls on each brick</emphasis> +</para> + <itemizedlist> + <listitem> + <para>View list of highest directory read calls on each brick using the following command: +</para> + <para><command># gluster volume top <replaceable>VOLNAME</replaceable> readdir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para> + <para>For example, to view highest directory read calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>:</para> + <para><command># gluster volume top <replaceable>test-volume</replaceable> readdir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command> </para> + <para><code>Brick: <replaceable>server:/export/dir1</replaceable></code><programlisting>==========Directory readdirp stats======== + +readdirp count directory name + +1996 /clients/client0/~dmtmp + +1083 /clients/client0/~dmtmp/PARADOX + +904 /clients/client8/~dmtmp + +904 /clients/client2/~dmtmp + +904 /clients/client6/~dmtmp + +904 /clients/client5/~dmtmp + +904 /clients/client9/~dmtmp + +812 /clients/client1/~dmtmp + +812 /clients/client7/~dmtmp + +800 /clients/client4/~dmtmp</programlisting> +</para> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf"> + <title>Viewing List of Read Performance on each Brick </title> + <para>You can view the read throughput of files on each brick. If brick name is not specified, then the +metrics of all the bricks belonging to that volume will be displayed. The output will be the read +throughput. +</para> + <para><programlisting> ==========Read throughput file stats======== + +read filename Time +through +put(MBp +s) + +2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31 + TRIDOTS.POT 15:38:36.894610 +2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31 + PCBENCHM.PPT 15:38:39.815310 +2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:52:53.631499 + +2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:38:36.926198 + +2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + LARGE.FIL 15:38:36.930445 + +2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31 + COURSES.X04 15:38:40.549919 + +2221.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31 + STUDENTS.VAL 15:52:53.298766 + +2221.00 /clients/client3/~dmtmp/SEED/ -2011-01-31 + COURSES.DB 15:39:11.776780 + +2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:39:10.251764 + +2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31 + BASEMACH.DOC 15:39:09.336572 </programlisting>This command will initiate a dd for the specified count and block size and measures the +corresponding throughput. +</para> + <para><emphasis role="bold">To view list of read performance on each brick</emphasis> +</para> + <itemizedlist> + <listitem> + <para>View list of read performance on each brick using the following command: +</para> + <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command> +</para> + <para>For example, to view read performance on brick server:/export/ of test-volume, 256 block size +of count 1, and list count 10: +</para> + <para><command># gluster volume top <replaceable>test-volume</replaceable> read-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para> + <para><computeroutput>Brick: server:/export/dir1 256 bytes (256 B) copied, Throughput: 4.1 MB/s </computeroutput></para> + <programlisting> ==========Read throughput file stats======== + +read filename Time +through +put(MBp +s) + +2912.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31 + TRIDOTS.POT 15:38:36.896486 + +2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31 + PCBENCHM.PPT 15:38:39.815310 + +2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:52:53.631499 + +2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:38:36.926198 + +2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + LARGE.FIL 15:38:36.930445 + +2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31 + COURSES.X04 15:38:40.549919 + +2221.00 /clients/client9/~dmtmp/PARADOX/ -2011-01-31 + STUDENTS.VAL 15:52:53.298766 + +2221.00 /clients/client8/~dmtmp/PARADOX/ -2011-01-31 + COURSES.DB 15:39:11.776780 + +2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:39:10.251764 + +2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31 + BASEMACH.DOC 15:39:09.336572 + </programlisting> + </listitem> + </itemizedlist> + </section> + <section id="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf"> + <title>Viewing List of Write Performance on each Brick </title> + <para>You can view list of write throughput of files on each brick. If brick name is not specified, then the +metrics of all the bricks belonging to that volume will be displayed. The output will be the write +throughput. +</para> + <para>This command will initiate a dd for the specified count and block size and measures the +corresponding throughput. +To view list of write performance on each brick: +</para> + <itemizedlist> + <listitem> + <para>View list of write performance on each brick using the following command: +</para> + <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para> + <para>For example, to view write performance on brick <replaceable>server:/export/</replaceable> of <replaceable>test-volume</replaceable>, 256 block size +of count 1, and list count 10: +</para> + <para><command># gluster volume top <replaceable>test-volume</replaceable> write-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para> + <para><code>Brick</code>: <replaceable>server:/export/dir1</replaceable> +</para> + <para><code>256 bytes (256 B) copied, Throughput: 2.8 MB/s </code><programlisting> ==========Write throughput file stats======== + +write filename Time +throughput +(MBps) + +1170.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + SMALL.FIL 15:39:09.171494 + +1008.00 /clients/client6/~dmtmp/SEED/ -2011-01-31 + LARGE.FIL 15:39:09.73189 + +949.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:38:36.927426 + +936.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + LARGE.FIL 15:38:36.933177 +897.00 /clients/client5/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:39:09.33628 + +897.00 /clients/client6/~dmtmp/SEED/ -2011-01-31 + MEDIUM.FIL 15:39:09.27713 + +885.00 /clients/client0/~dmtmp/SEED/ -2011-01-31 + SMALL.FIL 15:38:36.924271 + +528.00 /clients/client5/~dmtmp/SEED/ -2011-01-31 + LARGE.FIL 15:39:09.81893 + +516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31 + FASTENER.MDB 15:39:01.797317 +</programlisting></para> + </listitem> + </itemizedlist> + </section> + </section> + <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Information"> + <title>Displaying Volume Information </title> + <para>You can display information about a specific volume, or all volumes, as needed.</para> + <para><emphasis role="bold">To display volume information </emphasis></para> + <itemizedlist> + <listitem> + <para>Display information about a specific volume using the following command:</para> + <para><command># gluster volume info </command><varname>VOLNAME</varname></para> + <para>For example, to display information about test-volume:</para> + <para><programlisting># gluster volume info test-volume +Volume Name: test-volume +Type: Distribute +Status: Created +Number of Bricks: 4 +Bricks: +Brick1: server1:/exp1 +Brick2: server2:/exp2 +Brick3: server3:/exp3 +Brick4: server4:/exp4</programlisting></para> + </listitem> + <listitem> + <para>Display information about all volumes using the following command:</para> + <para><command># gluster volume info all</command></para> + <para><programlisting># gluster volume info all + +Volume Name: test-volume +Type: Distribute +Status: Created +Number of Bricks: 4 +Bricks: +Brick1: server1:/exp1 +Brick2: server2:/exp2 +Brick3: server3:/exp3 +Brick4: server4:/exp4 + +Volume Name: mirror +Type: Distributed-Replicate +Status: Started +Number of Bricks: 2 X 2 = 4 +Bricks: +Brick1: server1:/brick1 +Brick2: server2:/brick2 +Brick3: server3:/brick3 +Brick4: server4:/brick4 + +Volume Name: Vol +Type: Distribute +Status: Started +Number of Bricks: 1 +Bricks: +Brick: server:/brick6 + +</programlisting></para> + </listitem> + </itemizedlist> + </section> + <section id="sect-Administration_Guide-Monitor_Workload-Performing_Statedump"> + <title>Performing Statedump on a Volume </title> + <para>Statedump is a mechanism through which you can get details of all internal variables and state of the glusterfs process at the time of issuing the command.You can perform statedumps of the brick processes and nfs server process of a volume using the statedump command. The following options can be used to determine what information is to be dumped:</para> + <itemizedlist> + <listitem> + <para><emphasis role="bold">mem</emphasis> - Dumps the memory usage and memory pool details of the bricks.</para> + </listitem> + <listitem> + <para><emphasis role="bold">iobuf</emphasis> - Dumps iobuf details of the bricks.</para> + </listitem> + <listitem> + <para><emphasis role="bold">priv</emphasis> - Dumps private information of loaded translators.</para> + </listitem> + <listitem> + <para><emphasis role="bold">callpool</emphasis> - Dumps the pending calls of the volume.</para> + </listitem> + <listitem> + <para><emphasis role="bold">fd</emphasis> - Dumps the open fd tables of the volume.</para> + </listitem> + <listitem> + <para><emphasis role="bold">inode</emphasis> - Dumps the inode tables of the volume.</para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">To display volume statedump </emphasis></para> + <itemizedlist> + <listitem> + <para>Display statedump of a volume or NFS server using the following command:</para> + <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> [nfs] [all|mem|iobuf|callpool|priv|fd|inode]</command></para> + <para>For example, to display statedump of test-volume:</para> + <para><programlisting># gluster volume statedump test-volume +Volume statedump successful</programlisting></para> + <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename><brick-path>.<brick-pid>.dump</filename>.</para> + </listitem> + <listitem> + <para>By defult, the output of the statedump is stored at <filename> /tmp/<brickname.PID.dump></filename> file on that particular server. Change the directory of the statedump file using the following command:</para> + <para><command># gluster volume set <replaceable>VOLNAME</replaceable> server.statedump-path <replaceable>path</replaceable></command></para> + <para>For example, to change the location of the statedump file of test-volume:</para> + <para><programlisting># gluster volume set test-volume server.statedump-path /usr/local/var/log/glusterfs/dumps/ +Set volume successful</programlisting></para> + <para>You can view the changed path of the statedump file using the following command:</para> + <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para> + </listitem> + </itemizedlist> + </section> + <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Status"> + <title>Displaying Volume Status </title> + <para>You can display the status information about a specific volume, brick or all volumes, as needed. Status information can be used to understand the current status of the brick, nfs processes, and overall file system. Status information can also be used to monitor and debug the volume information. You can view status of the volume along with the following details:</para> + <itemizedlist> + <listitem> + <para><emphasis role="bold">detail</emphasis> - Displays additional information about the bricks.</para> + </listitem> + <listitem> + <para><emphasis role="bold">clients</emphasis> - Displays the list of clients connected to the volume.</para> + </listitem> + <listitem> + <para><emphasis role="bold">mem</emphasis> - Displays the memory usage and memory pool details of the bricks.</para> + </listitem> + <listitem> + <para><emphasis role="bold">inode</emphasis> - Displays the inode tables of the volume.</para> + </listitem> + <listitem> + <para><emphasis role="bold">fd</emphasis> - Displays the open fd (file descriptors) tables of the volume.</para> + </listitem> + <listitem> + <para><emphasis role="bold">callpool</emphasis> - Displays the pending calls of the volume.</para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">To display volume status </emphasis></para> + <itemizedlist> + <listitem> + <para>Display information about a specific volume using the following command:</para> + <para><command># gluster volume status [all|<replaceable>VOLNAME</replaceable> [<replaceable>BRICKNAME</replaceable>]] [detail|clients|mem|inode|fd|callpool]</command> </para> + <para>For example, to display information about test-volume:</para> + <para><programlisting># gluster volume status test-volume +STATUS OF VOLUME: test-volume +BRICK PORT ONLINE PID +-------------------------------------------------------- +arch:/export/1 24009 Y 22445 +-------------------------------------------------------- +arch:/export/2 24010 Y 22450</programlisting></para> + </listitem> + <listitem> + <para>Display information about all volumes using the following command:</para> + <para><command># gluster volume status all</command> +</para> + <para><programlisting># gluster volume status all +STATUS OF VOLUME: volume-test +BRICK PORT ONLINE PID +-------------------------------------------------------- +arch:/export/4 24010 Y 22455 + +STATUS OF VOLUME: test-volume +BRICK PORT ONLINE PID +-------------------------------------------------------- +arch:/export/1 24009 Y 22445 +-------------------------------------------------------- +arch:/export/2 24010 Y 22450</programlisting></para> + </listitem> + <listitem> + <para>Display additional information about the bricks using the following command:</para> + <para><command># gluster volume status <replaceable>VOLNAME</replaceable> detail</command> +</para> + <para>For example, to display additional information about the bricks of test-volume:</para> + <para><programlisting># gluster volume status test-volume details +STATUS OF VOLUME: test-volume +------------------------------------------- +Brick : arch:/export/1 +Port : 24009 +Online : Y +Pid : 16977 +File System : rootfs +Device : rootfs +Mount Options : rw +Disk Space Free : 13.8GB +Total Disk Space : 46.5GB +Inode Size : N/A +Inode Count : N/A +Free Inodes : N/A + +Number of Bricks: 1 +Bricks: +Brick: server:/brick6</programlisting></para> + </listitem> + <listitem> + <para>Display the list of clients accessing the volumes using the following command:</para> + <para><command># gluster volume status <replaceable>VOLNAME</replaceable> clients</command> +</para> + <para>For example, to display the list of clients connected to test-volume:</para> + <para><programlisting># gluster volume status test-volume clients +Brick : arch:/export/1 +Clients connected : 2 +Hostname Bytes Read BytesWritten +-------- --------- ------------ +127.0.0.1:1013 776 676 +127.0.0.1:1012 50440 51200</programlisting></para> + </listitem> + <listitem> + <para>Display the memory usage and memory pool details of the bricks using the following command:</para> + <para><command># gluster volume status <replaceable>VOLNAME</replaceable> mem</command> +</para> + <para>For example, to display the memory usage and memory pool details of the bricks of test-volume:</para> + <screen>Memory status for volume : test-volume +---------------------------------------------- +Brick : arch:/export/1 +Mallinfo +-------- +Arena : 434176 +Ordblks : 2 +Smblks : 0 +Hblks : 12 +Hblkhd : 40861696 +Usmblks : 0 +Fsmblks : 0 +Uordblks : 332416 +Fordblks : 101760 +Keepcost : 100400 + +Mempool Stats +------------- +Name HotCount ColdCount PaddedSizeof AllocCount MaxAlloc +---- -------- --------- ------------ ---------- -------- +test-volume-server:fd_t 0 16384 92 57 5 +test-volume-server:dentry_t 59 965 84 59 59 +test-volume-server:inode_t 60 964 148 60 60 +test-volume-server:rpcsvc_request_t 0 525 6372 351 2 +glusterfs:struct saved_frame 0 4096 124 2 2 +glusterfs:struct rpc_req 0 4096 2236 2 2 +glusterfs:rpcsvc_request_t 1 524 6372 2 1 +glusterfs:call_stub_t 0 1024 1220 288 1 +glusterfs:call_stack_t 0 8192 2084 290 2 +glusterfs:call_frame_t 0 16384 172 1728 6</screen> + </listitem> + <listitem> + <para>Display the inode tables of the volume using the following command:</para> + <para><command># gluster volume status <replaceable>VOLNAME</replaceable> inode</command> +</para> + <para>For example, to display the inode tables of the test-volume:</para> + <para><programlisting># gluster volume status test-volume inode +inode tables for volume test-volume +---------------------------------------------- +Brick : arch:/export/1 +Active inodes: +GFID Lookups Ref IA type +---- ------- --- ------- +6f3fe173-e07a-4209-abb6-484091d75499 1 9 2 +370d35d7-657e-44dc-bac4-d6dd800ec3d3 1 1 2 + +LRU inodes: +GFID Lookups Ref IA type +---- ------- --- ------- +80f98abe-cdcf-4c1d-b917-ae564cf55763 1 0 1 +3a58973d-d549-4ea6-9977-9aa218f233de 1 0 1 +2ce0197d-87a9-451b-9094-9baa38121155 1 0 2</programlisting></para> + </listitem> + <listitem> + <para>Display the open fd tables of the volume using the following command:</para> + <para><command># gluster volume status <replaceable>VOLNAME</replaceable> fd</command> +</para> + <para>For example, to display the open fd tables of the test-volume:</para> + <para><screen># gluster volume status test-volume fd + +FD tables for volume test-volume +---------------------------------------------- +Brick : arch:/export/1 +Connection 1: +RefCount = 0 MaxFDs = 128 FirstFree = 4 +FD Entry PID RefCount Flags +-------- --- -------- ----- +0 26311 1 2 +1 26310 3 2 +2 26310 1 2 +3 26311 3 2 + +Connection 2: +RefCount = 0 MaxFDs = 128 FirstFree = 0 +No open fds + +Connection 3: +RefCount = 0 MaxFDs = 128 FirstFree = 0 +No open fds</screen></para> + </listitem> + <listitem> + <para>Display the pending calls of the volume using the following command:</para> + <para><command># gluster volume status <replaceable>VOLNAME</replaceable> callpool</command> +</para> + <para>Each call has a call stack containing call frames.</para> + <para>For example, to display the pending calls of test-volume:</para> + <para><programlisting># gluster volume status test-volume + +Pending calls for volume test-volume +---------------------------------------------- +Brick : arch:/export/1 +Pending calls: 2 +Call Stack1 + UID : 0 + GID : 0 + PID : 26338 + Unique : 192138 + Frames : 7 + Frame 1 + Ref Count = 1 + Translator = test-volume-server + Completed = No + Frame 2 + Ref Count = 0 + Translator = test-volume-posix + Completed = No + Parent = test-volume-access-control + Wind From = default_fsync + Wind To = FIRST_CHILD(this)->fops->fsync + Frame 3 + Ref Count = 1 + Translator = test-volume-access-control + Completed = No + Parent = repl-locks + Wind From = default_fsync + Wind To = FIRST_CHILD(this)->fops->fsync + Frame 4 + Ref Count = 1 + Translator = test-volume-locks + Completed = No + Parent = test-volume-io-threads + Wind From = iot_fsync_wrapper + Wind To = FIRST_CHILD (this)->fops->fsync + Frame 5 + Ref Count = 1 + Translator = test-volume-io-threads + Completed = No + Parent = test-volume-marker + Wind From = default_fsync + Wind To = FIRST_CHILD(this)->fops->fsync + Frame 6 + Ref Count = 1 + Translator = test-volume-marker + Completed = No + Parent = /export/1 + Wind From = io_stats_fsync + Wind To = FIRST_CHILD(this)->fops->fsync + Frame 7 + Ref Count = 1 + Translator = /export/1 + Completed = No + Parent = test-volume-server + Wind From = server_fsync_resume + Wind To = bound_xl->fops->fsync</programlisting></para> + </listitem> + </itemizedlist> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_setting_volumes.xml b/doc/legacy/docbook/admin_setting_volumes.xml new file mode 100644 index 00000000000..6a8468d5f11 --- /dev/null +++ b/doc/legacy/docbook/admin_setting_volumes.xml @@ -0,0 +1,325 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<chapter id="chap-Administration_Guide-Setting_Volumes"> + <title>Setting up GlusterFS Server Volumes</title> + <para>A volume is a logical collection of bricks where each brick is an export directory on a server in the trusted storage pool. Most of the gluster management operations are performed on the volume. </para> + <para>To create a new volume in your storage environment, specify the bricks that comprise the volume. After you have created a new volume, you must start it before attempting to mount it. </para> + <itemizedlist> + <listitem> + <para>Volumes of the following types can be created in your storage environment: </para> + <itemizedlist> + <listitem> + <para>Distributed - Distributed volumes distributes files throughout the bricks in the volume. You can use distributed volumes where the requirement is to scale storage and the redundancy is either not important or is provided by other hardware/software layers. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed"/> .</para> + </listitem> + <listitem> + <para>Replicated – Replicated volumes replicates files across bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Replicated"/>.</para> + </listitem> + <listitem> + <para>Striped – Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped"/>.</para> + </listitem> + <listitem> + <para>Distributed Striped - Distributed striped volumes stripe data across two or more nodes in the cluster. You should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped"/>.</para> + </listitem> + <listitem> + <para>Distributed Replicated - Distributed replicated volumes distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated"/>. </para> + </listitem> + <listitem> + <para>Distributed Striped Replicated – Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated"/>. +</para> + </listitem> + <listitem> + <para>Striped Replicated – Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more +information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped_Replicated"/>.</para> + </listitem> + </itemizedlist> + </listitem> + </itemizedlist> + <para><emphasis role="bold">To create a new volume </emphasis></para> + <itemizedlist> + <listitem> + <para>Create a new volume :</para> + <para><command># gluster volume create<replaceable> NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable> | replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp, rdma] <replaceable>NEW-BRICK1 NEW-BRICK2 NEW-BRICK3...</replaceable></command></para> + <para>For example, to create a volume called test-volume consisting of server3:/exp3 and server4:/exp4:</para> + <para><programlisting># gluster volume create test-volume server3:/exp3 server4:/exp4 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + </listitem> + </itemizedlist> + <section id="sect-Administration_Guide-Setting_Volumes-Distributed"> + <title>Creating Distributed Volumes</title> + <para>In a distributed volumes files are spread randomly across the bricks in the volume. Use distributed volumes where you need to scale storage and redundancy is either not important or is provided by other hardware/software layers. </para> + <para><note> + <para>Disk/server failure in distributed volumes can result in a serious loss of data because directory contents are spread randomly across the bricks in the volume. </para> + </note></para> + <figure> + <title>Illustration of a Distributed Volume</title> + <mediaobject> + <imageobject> + <imagedata fileref="images/Distributed_Volume.png"/> + </imageobject> + </mediaobject> + </figure> + <para><emphasis role="bold">To create a distributed volume</emphasis></para> + <orderedlist> + <listitem> + <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Create the distributed volume:</para> + <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para> + <para>For example, to create a distributed volume with four storage servers using tcp:</para> + <para><programlisting># gluster volume create test-volume server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>(Optional) You can display the volume information:</para> + <para><programlisting># gluster volume info +Volume Name: test-volume +Type: Distribute +Status: Created +Number of Bricks: 4 +Transport-type: tcp +Bricks: +Brick1: server1:/exp1 +Brick2: server2:/exp2 +Brick3: server3:/exp3 +Brick4: server4:/exp4</programlisting></para> + <para>For example, to create a distributed volume with four storage servers over InfiniBand:</para> + <para><programlisting># gluster volume create test-volume transport rdma server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + <para><note> + <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para> + </note></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Setting_Volumes-Replicated"> + <title>Creating Replicated Volumes </title> + <para>Replicated volumes create copies of files across multiple bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. </para> + <para><note> + <para>The number of bricks should be equal to of the replica count for a replicated volume. +To protect against server and disk failures, it is recommended that the bricks of the volume are from different servers. </para> + </note></para> + <figure> + <title>Illustration of a Replicated Volume</title> + <mediaobject> + <imageobject> + <imagedata fileref="images/Replicated_Volume.png"/> + </imageobject> + </mediaobject> + </figure> + <para><emphasis role="bold">To create a replicated volume </emphasis></para> + <orderedlist> + <listitem> + <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Create the replicated volume:</para> + <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para> + <para>For example, to create a replicated volume with two storage servers:</para> + <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + <para><note> + <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para> + </note></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Setting_Volumes-Striped"> + <title>Creating Striped Volumes</title> + <para>Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files.</para> + <para><note> + <para>The number of bricks should be a equal to the stripe count for a striped volume. </para> + </note></para> + <figure> + <title>Illustration of a Striped Volume</title> + <mediaobject> + <imageobject> + <imagedata fileref="images/Striped_Volume.png"/> + </imageobject> + </mediaobject> + </figure> + <para><emphasis role="bold">To create a striped volume </emphasis></para> + <orderedlist> + <listitem> + <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Create the striped volume:</para> + <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para> + <para>For example, to create a striped volume across two storage servers:</para> + <para><programlisting># gluster volume create test-volume stripe 2 transport tcp server1:/exp1 server2:/exp2 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + <para><note> + <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para> + </note></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped"> + <title>Creating Distributed Striped Volumes </title> + <para>Distributed striped volumes stripes files across two or more nodes in the cluster. For best results, you should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical.</para> + <para><note> + <para>The number of bricks should be a multiple of the stripe count for a distributed striped volume. </para> + </note></para> + <figure> + <title>Illustration of a Distributed Striped Volume</title> + <mediaobject> + <imageobject> + <imagedata fileref="images/Distributed_Striped_Volume.png"/> + </imageobject> + </mediaobject> + </figure> + <para><emphasis role="bold">To create a distributed striped volume </emphasis></para> + <orderedlist> + <listitem> + <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Create the distributed striped volume:</para> + <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para> + <para>For example, to create a distributed striped volume across eight storage servers:</para> + <para><programlisting># gluster volume create test-volume stripe 4 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + <para><note> + <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para> + </note></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated"> + <title>Creating Distributed Replicated Volumes </title> + <para>Distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments.</para> + <para><note> + <para>The number of bricks should be a multiple of the replica count for a distributed replicated volume. Also, the order in which bricks are specified has a great effect on data protection. Each replica_count consecutive bricks in the list you give will form a replica set, with all replica sets combined into a volume-wide distribute set. To make sure that replica-set members are not placed on the same node, list the first brick on every server, then the second brick on every server in the same order, and so on. </para> + </note></para> + <figure> + <title>Illustration of a Distributed Replicated Volume</title> + <mediaobject> + <imageobject> + <imagedata fileref="images/Distributed_Replicated_Volume.png"/> + </imageobject> + </mediaobject> + </figure> + <para><emphasis role="bold">To create a distributed replicated volume </emphasis></para> + <orderedlist> + <listitem> + <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Create the distributed replicated volume:</para> + <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para> + <para>For example, four node distributed (replicated) volume with a two-way mirror: +</para> + <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>For example, to create a six node distributed (replicated) volume with a two-way mirror:</para> + <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + <para><note> + <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para> + </note></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated"> + <title>Creating Distributed Striped Replicated Volumes </title> + <para>Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. </para> + <para><note> + <para>The number of bricks should be a multiples of number of stripe count and replica count for +a distributed striped replicated volume. + </para> + </note></para> + <para><emphasis role="bold">To create a distributed striped replicated volume</emphasis> +</para> + <orderedlist> + <listitem> + <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Create a distributed striped replicated volume using the following command:</para> + <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para> + <para>For example, to create a distributed replicated striped volume across eight storage servers: +</para> + <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + <para><note> + <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para> + </note></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Setting_Volumes-Striped_Replicated"> + <title>Creating Striped Replicated Volumes </title> + <para>Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads.</para> + <para><note> + <para>The number of bricks should be a multiple of the replicate count and stripe count for a +striped replicated volume. +</para> + </note></para> + <figure> + <title>Illustration of a Striped Replicated Volume</title> + <mediaobject> + <imageobject> + <imagedata fileref="images/Striped_Replicated_Volume.png"/> + </imageobject> + </mediaobject> + </figure> + <para><emphasis role="bold">To create a striped replicated volume</emphasis> +</para> + <orderedlist> + <listitem> + <para>Create a trusted storage pool consisting of the storage servers that will comprise the volume.</para> + <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para> + </listitem> + <listitem> + <para>Create a striped replicated volume :</para> + <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para> + <para>For example, to create a striped replicated volume across four storage servers: + +</para> + <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>To create a striped replicated volume across six storage servers: +</para> + <para><programlisting># gluster volume create test-volume stripe 3 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 +Creation of test-volume has been successful +Please start the volume to access data.</programlisting></para> + <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para> + <para><note> + <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para> + </note></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-Setting_Volumes-Starting"> + <title>Starting Volumes </title> + <para>You must start your volumes before you try to mount them. </para> + <para><emphasis role="bold">To start a volume </emphasis></para> + <itemizedlist> + <listitem> + <para>Start a volume:</para> + <para><command># gluster volume start <replaceable>VOLNAME</replaceable></command></para> + <para>For example, to start test-volume:</para> + <para><programlisting># gluster volume start test-volume +Starting test-volume has been successful</programlisting></para> + </listitem> + </itemizedlist> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_settingup_clients.xml b/doc/legacy/docbook/admin_settingup_clients.xml new file mode 100644 index 00000000000..22979acf477 --- /dev/null +++ b/doc/legacy/docbook/admin_settingup_clients.xml @@ -0,0 +1,511 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<chapter id="chap-Administration_Guide-GlusterFS_Client"> + <title>Accessing Data - Setting Up GlusterFS Client</title> + <para>You can access gluster volumes in multiple ways. You can use Gluster Native Client method for high concurrency, performance and transparent failover in GNU/Linux clients. You can also use NFS v3 to access gluster volumes. Extensive testing has be done on GNU/Linux clients and NFS implementation in other operating system, such as FreeBSD, and Mac OS X, as well as Windows 7 (Professional and Up) and Windows Server 2003. Other NFS client implementations may work with gluster NFS server.</para> + <para>You can use CIFS to access volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. </para> + <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native"> + <title>Gluster Native Client</title> + <para>The Gluster Native Client is a FUSE-based client running in user space. Gluster Native Client is the recommended method for accessing volumes when high concurrency and high write performance is required.</para> + <para>This section introduces the Gluster Native Client and explains how to install the software on client machines. This section also describes how to mount volumes on clients (both manually and automatically) and how to verify that the volume has mounted successfully. </para> + <section> + <title>Installing the Gluster Native Client</title> + <para>Before you begin installing the Gluster Native Client, you need to verify that the FUSE module is loaded on the client and has access to the required modules as follows: </para> + <orderedlist> + <listitem> + <para>Add the FUSE loadable kernel module (LKM) to the Linux kernel:</para> + <para><command># modprobe fuse</command></para> + </listitem> + <listitem> + <para>Verify that the FUSE module is loaded:</para> + <para><command># dmesg | grep -i fuse </command></para> + <para><command>fuse init (API version 7.13)</command></para> + </listitem> + </orderedlist> + <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-RPM"> + <title>Installing on Red Hat Package Manager (RPM) Distributions </title> + <para>To install Gluster Native Client on RPM distribution-based systems</para> + <orderedlist> + <listitem> + <para>Install required prerequisites on the client using the following command:</para> + <para><command>$ sudo yum -y install openssh-server wget fuse fuse-libs openib libibverbs</command></para> + </listitem> + <listitem> + <para>Ensure that TCP and UDP ports 24007 and 24008 are open on all Gluster servers. Apart from these ports, you need to open one port for each brick starting from port 24009. For example: if you have five bricks, you need to have ports 24009 to 24013 open.</para> + <para>You can use the following chains with iptables:</para> + <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT </code></para> + <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT</code></para> + <para><note> + <para>If you already have iptable chains, make sure that the above ACCEPT rules precede the DROP rules. This can be achieved by providing a lower rule number than the DROP rule.</para> + </note></para> + </listitem> + <listitem> + <para>Download the latest glusterfs, glusterfs-fuse, and glusterfs-rdma RPM files to each client. The glusterfs package contains the Gluster Native Client. The glusterfs-fuse package contains the FUSE translator required for mounting on client systems and the glusterfs-rdma packages contain OpenFabrics verbs RDMA module for Infiniband.</para> + <para>You can download the software at <ulink url="http://bits.gluster.com/gluster/glusterfs/3.3.0qa30/x86_64/"/>.</para> + </listitem> + <listitem> + <para>Install Gluster Native Client on the client.</para> + <para><command>$ sudo rpm -i glusterfs-3.3.0qa30-1.x86_64.rpm </command></para> + <para><command>$ sudo rpm -i glusterfs-fuse-3.3.0qa30-1.x86_64.rpm </command></para> + <para><command>$ sudo rpm -i glusterfs-rdma-3.3.0qa30-1.x86_64.rpm</command></para> + <para><note> + <para>The RDMA module is only required when using Infiniband.</para> + </note></para> + </listitem> + </orderedlist> + </section> + <section condition="gfs"> + <title>Installing on Debian-based Distributions</title> + <para>To install Gluster Native Client on Debian-based distributions</para> + <orderedlist> + <listitem> + <para>Install OpenSSH Server on each client using the following command:</para> + <para><command>$ sudo apt-get install openssh-server vim wget</command></para> + </listitem> + <listitem> + <para>Download the latest GlusterFS .deb file and checksum to each client.</para> + <para>You can download the software at <ulink url="http://www.gluster.org/download/"/>.</para> + </listitem> + <listitem> + <para>For each .deb file, get the checksum (using the following command) and compare it against the checksum for that file in the md5sum file.</para> + <para> +<command>$ md5sum GlusterFS_DEB_file.deb </command></para> + <para>The md5sum of the packages is available at: <ulink url="http://download.gluster.com/pub/gluster/glusterfs"/></para> + </listitem> + <listitem> + <para>Uninstall GlusterFS v3.1 (or an earlier version) from the client using the following command: +</para> + <para><command>$ sudo dpkg -r glusterfs </command></para> + <para>(Optional) Run <command>$ sudo dpkg -purge glusterfs </command>to purge the configuration files.</para> + </listitem> + <listitem> + <para>Install Gluster Native Client on the client using the following command: +</para> + <para><command>$ sudo dpkg -i GlusterFS_DEB_file </command></para> + <para>For example: +</para> + <para><command>$ sudo dpkg -i glusterfs-3.3.x.deb </command></para> + </listitem> + <listitem> + <para>Ensure that TCP and UDP ports 24007 and 24008 are open on all Gluster servers. Apart from these ports, you need to open one port for each brick starting from port 24009. For example: if you have five bricks, you need to have ports 24009 to 24013 open. +</para> + <para>You can use the following chains with iptables: +</para> + <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT </code></para> + <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT</code></para> + <para><note> + <para>If you already have iptable chains, make sure that the above ACCEPT rules precede the DROP rules. This can be achieved by providing a lower rule number than the DROP rule.</para> + </note></para> + </listitem> + </orderedlist> + </section> + <section> + <title>Performing a Source Installation</title> + <para>To build and install Gluster Native Client from the source code</para> + <orderedlist> + <listitem> + <para>Create a new directory using the following commands:</para> + <para><command># mkdir glusterfs </command></para> + <para><command># cd glusterfs</command></para> + </listitem> + <listitem> + <para>Download the source code. +</para> + <para>You can download the source at <ulink url="http://www.gluster.org/download/"/>.</para> + </listitem> + <listitem> + <para>Extract the source code using the following command: +</para> + <para><command># tar -xvzf SOURCE-FILE </command></para> + </listitem> + <listitem> + <para>Run the configuration utility using the following command: +</para> + <para><code># ./configure </code></para> + <para><code>GlusterFS configure summary </code></para> + <para><code>================== </code></para> + <para><code>FUSE client : yes </code></para> + <para><code>Infiniband verbs : yes </code></para> + <para><code>epoll IO multiplex : yes </code></para> + <para><code>argp-standalone : no </code></para> + <para><code>fusermount : no </code></para> + <para><code>readline : yes</code></para> + <para>The configuration summary shows the components that will be built with Gluster Native Client.</para> + </listitem> + <listitem> + <para>Build the Gluster Native Client software using the following commands: +</para> + <para><command># make </command></para> + <para><command># make install</command></para> + </listitem> + <listitem> + <para>Verify that the correct version of Gluster Native Client is installed, using the following command: +</para> + <para><command># glusterfs –-version</command></para> + </listitem> + </orderedlist> + </section> + </section> + <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Mounting_Volumes"> + <title>Mounting Volumes</title> + <para>After installing the Gluster Native Client, you need to mount Gluster volumes to access data. There are two methods you can choose: </para> + <itemizedlist> + <listitem> + <para><xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Manuall"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Automatic"/></para> + </listitem> + </itemizedlist> + <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-Testing"/>. </para> + <para><note> + <para>Server names selected during creation of Volumes should be resolvable in the client machine. You can use appropriate /etc/hosts entries or DNS server to resolve server names to IP addresses. </para> + </note></para> + <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Manuall"> + <title>Manually Mounting Volumes</title> + <para>To manually mount a Gluster volume </para> + <itemizedlist> + <listitem> + <para>To mount a volume, use the following command: +</para> + <para><command># mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command> +</para> + <para>For example: +</para> + <para><command># mount -t glusterfs server1:/test-volume /mnt/glusterfs</command></para> + <note> + <para>The server specified in the mount command is only used to fetch the gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount). + +</para> + <para>If you see a usage message like "Usage: mount.glusterfs", mount usually requires you to create a directory to be used as the mount point. Run "mkdir /mnt/glusterfs" before you attempt to run the mount command listed above.</para> + </note> + </listitem> + </itemizedlist> + <para><emphasis role="bold">Mounting Options</emphasis></para> + <para>You can specify the following options when using the <command>mount -t glusterfs</command> command. Note that you need to separate all options with commas. + +</para> + <para>backupvolfile-server=server-name</para> + <para>volfile-max-fetch-attempts=number of attempts</para> + <para>log-level=loglevel +</para> + <para>log-file=logfile +</para> + <para>transport=transport-type +</para> + <para>direct-io-mode=[enable|disable] + +</para> + <para>For example: +</para> + <para><code># mount -t glusterfs -o backupvolfile-server=volfile_server2 --volfile-max-fetch-attempts=2 log-level=WARNING,log-file=/var/log/gluster.log server1:/test-volume /mnt/glusterfs</code></para> + <para>If <option>backupvolfile-server</option> option is added while mounting fuse client, when the first +volfile server fails, then the server specified in <option>backupvolfile-server</option> option is used as volfile server to mount +the client.</para> + <para>In <code>--volfile-max-fetch-attempts=X</code> option, specify the number of attempts to fetch volume files while mounting a volume. This option is useful when you mount a server with multiple IP addresses or when round-robin DNS is configured for the server-name.. </para> + </section> + <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Automatic" dir="lro"> + <title>Automatically Mounting Volumes</title> + <para>You can configure your system to automatically mount the Gluster volume each time your system starts. </para> + <para>The server specified in the mount command is only used to fetch the gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount). </para> + <para><emphasis role="bold">To automatically mount a Gluster volume</emphasis></para> + <itemizedlist> + <listitem> + <para>To mount a volume, edit the /etc/fstab file and add the following line: +</para> + <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev 0 0 </command></para> + <para>For example: +</para> + <para><code>server1:/test-volume /mnt/glusterfs glusterfs defaults,_netdev 0 0</code></para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">Mounting Options </emphasis></para> + <para>You can specify the following options when updating the /etc/fstab file. Note that you need to separate all options with commas. + +</para> + <para>log-level=loglevel +</para> + <para>log-file=logfile +</para> + <para>transport=transport-type +</para> + <para>direct-io-mode=[enable|disable] + +</para> + <para>For example: +</para> + <para><code>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev,log-level=WARNING,log-file=/var/log/gluster.log 0 0 </code></para> + </section> + <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-Testing"> + <title>Testing Mounted Volumes</title> + <para>To test mounted volumes</para> + <itemizedlist> + <listitem> + <para>Use the following command: +</para> + <para><command># mount </command></para> + <para>If the gluster volume was successfully mounted, the output of the mount command on the client will be similar to this example: + +</para> + <para><code>server1:/test-volume on /mnt/glusterfs type fuse.glusterfs (rw,allow_other,default_permissions,max_read=131072</code></para> + </listitem> + </itemizedlist> + <itemizedlist> + <listitem> + <para>Use the following command: +</para> + <para><command># df</command> +</para> + <para>The output of df command on the client will display the aggregated storage space from all the bricks in a volume similar to this example: +</para> + <para><code># df -h /mnt/glusterfs Filesystem Size Used Avail Use% Mounted on server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs</code></para> + </listitem> + <listitem> + <para>Change to the directory and list the contents by entering the following: +</para> + <para><command># cd MOUNTDIR </command></para> + <para><command># ls</command></para> + </listitem> + <listitem> + <para>For example,</para> + <para><code># cd /mnt/glusterfs </code></para> + <para><code># ls</code></para> + </listitem> + </itemizedlist> + </section> + </section> + </section> + <section id="sect-Administration_Guide-GlusterFS_Client-NFS"> + <title>NFS</title> + <para>You can use NFS v3 to access to gluster volumes. Extensive testing has be done on GNU/Linux clients and NFS implementation in other operating system, such as FreeBSD, and Mac OS X, as well as Windows 7 (Professional and Up), Windows Server 2003, and others, may work with gluster NFS server implementation. </para> + <para>GlusterFS now includes network lock manager (NLM) v4. NLM enables applications on NFSv3 clients to do record locking on files on NFS server. It is started automatically whenever the NFS server is run.</para> + <para condition="gfs">You must install nfs-common package on both servers and clients (only for Debian-based) distribution.</para> + <para>This section describes how to use NFS to mount Gluster volumes (both manually and automatically) and how to verify that the volume has been mounted successfully. </para> + <section> + <title>Using NFS to Mount Volumes</title> + <para>You can use either of the following methods to mount Gluster volumes: </para> + <para><itemizedlist> + <listitem> + <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Manual"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic"/></para> + </listitem> + </itemizedlist></para> + <para condition="gfs"><emphasis role="bold">Prerequisite</emphasis>: Install nfs-common package on both servers and clients (only for Debian-based distribution), using the following command: </para> + <para condition="gfs"><command>$ sudo aptitude install nfs-common </command></para> + <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Testing"/>. </para> + <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Manual"> + <title>Manually Mounting Volumes Using NFS </title> + <para>To manually mount a Gluster volume using NFS </para> + <itemizedlist> + <listitem> + <para>To mount a volume, use the following command: +</para> + <para><command># mount -t nfs -o vers=3 HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command> +</para> + <para>For example:</para> + <para><command># mount -t nfs -o vers=3 server1:/test-volume /mnt/glusterfs</command></para> + <para><note> + <para> Gluster NFS server does not support UDP. If the NFS client you are using defaults to connecting using UDP, the following message appears: +</para> + <para><code>requested NFS version or transport protocol is not supported</code>. </para> + </note></para> + <para><emphasis role="bold">To connect using TCP</emphasis></para> + </listitem> + <listitem> + <para>Add the following option to the mount command: +</para> + <para><command>-o mountproto=tcp </command></para> + <para>For example: +</para> + <para><command># mount -o mountproto=tcp -t nfs server1:/test-volume /mnt/glusterfs</command></para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">To mount Gluster NFS server from a Solaris client </emphasis></para> + <itemizedlist> + <listitem> + <para>Use the following command: +</para> + <para><command># mount -o proto=tcp,vers=3 nfs://HOSTNAME-OR-IPADDRESS:38467/VOLNAME MOUNTDIR</command></para> + <para> +For example:</para> + <para><command> # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs</command></para> + </listitem> + </itemizedlist> + </section> + <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic"> + <title>Automatically Mounting Volumes Using NFS</title> + <para>You can configure your system to automatically mount Gluster volumes using NFS each time the system starts.</para> + <para><emphasis role="bold">To automatically mount a Gluster volume using NFS </emphasis></para> + <itemizedlist> + <listitem> + <para>To mount a volume, edit the /etc/fstab file and add the following line:</para> + <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,vers=3 0 0</command></para> + <para>For example,</para> + <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,vers=3 0 0</command></para> + <note> + <para>Gluster NFS server does not support UDP. If the NFS client you are using defaults to connecting using UDP, the following message appears: </para> + <para><command>requested NFS version or transport protocol is not supported.</command></para> + </note> + <para/> + <para>To connect using TCP </para> + </listitem> + <listitem> + <para>Add the following entry in /etc/fstab file :</para> + <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,mountproto=tcp 0 0</command></para> + <para>For example,</para> + <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,mountproto=tcp 0 0</command></para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">To automount NFS mounts</emphasis></para> + <para>Gluster supports *nix standard method of automounting NFS mounts. Update the /etc/auto.master and /etc/auto.misc and restart the autofs service. After that, whenever a user or process attempts to access the directory it will be mounted in the background. </para> + </section> + <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Testing"> + <title>Testing Volumes Mounted Using NFS</title> + <para>You can confirm that Gluster directories are mounting successfully. </para> + <para><emphasis role="bold">To test mounted volumes</emphasis></para> + <itemizedlist> + <listitem> + <para>Use the mount command by entering the following:</para> + <para><command># mount</command></para> + <para>For example, the output of the mount command on the client will display an entry like the following:</para> + <para><command>server1:/test-volume on /mnt/glusterfs type nfs (rw,vers=3,addr=server1)</command></para> + </listitem> + </itemizedlist> + <itemizedlist> + <listitem> + <para>Use the df command by entering the following:</para> + <para><command># df</command></para> + <para>For example, the output of df command on the client will display the aggregated storage space from all the bricks in a volume.</para> + <para><screen># df -h /mnt/glusterfs +Filesystem Size Used Avail Use% Mounted on +server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs</screen></para> + </listitem> + <listitem> + <para>Change to the directory and list the contents by entering the following:</para> + <para><command># cd MOUNTDIR</command></para> + <para><command># ls</command></para> + <para>For example,</para> + <para><command> + <command># cd /mnt/glusterfs</command> + </command></para> + <para><command># ls</command></para> + </listitem> + </itemizedlist> + </section> + </section> + </section> + <section id="sect-Administration_Guide-GlusterFS_Client-CIFS"> + <title>CIFS</title> + <para>You can use CIFS to access to volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. You can export glusterfs mount point as the samba export, and then mount it using CIFS protocol.</para> + <para>This section describes how to mount CIFS shares on Microsoft Windows-based clients (both manually and automatically) and how to verify that the volume has mounted successfully.</para> + <para><note> + <para> CIFS access using the Mac OS X Finder is not supported, however, you can use the Mac OS X command line to access Gluster volumes using CIFS.</para> + </note></para> + <section> + <title>Using CIFS to Mount Volumes</title> + <para>You can use either of the following methods to mount Gluster volumes: </para> + <itemizedlist> + <listitem> + <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic"/></para> + </listitem> + </itemizedlist> + <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Testing"/>.</para> + <para>You can also use Samba for exporting Gluster Volumes through CIFS protocol.</para> + <section> + <title>Exporting Gluster Volumes Through Samba</title> + <para>We recommend you to use Samba for exporting Gluster volumes through the CIFS protocol. </para> + <para><emphasis role="bold">To export volumes through CIFS protocol </emphasis></para> + <orderedlist> + <listitem> + <para>Mount a Gluster volume. For more information on mounting volumes, see <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Mounting_Volumes"/>.</para> + </listitem> + <listitem> + <para>Setup Samba configuration to export the mount point of the Gluster volume.</para> + <para>For example, if a Gluster volume is mounted on /mnt/gluster, you must edit smb.conf file to enable exporting this through CIFS. Open smb.conf file in an editor and add the following lines for a simple configuration:</para> + <para>[glustertest] + </para> + <para> comment = For testing a Gluster volume exported through CIFS + </para> + <para> path = /mnt/glusterfs + </para> + <para> read only = no + </para> + <para> guest ok = yes</para> + </listitem> + </orderedlist> + <para>Save the changes and start the smb service using your systems init scripts (/etc/init.d/smb [re]start).</para> + <para><note> + <para>To be able mount from any server in the trusted storage pool, you must repeat these steps on each Gluster node. For more advanced configurations, see Samba documentation. </para> + </note></para> + </section> + <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual"> + <title>Manually Mounting Volumes Using CIFS </title> + <para>You can manually mount Gluster volumes using CIFS on Microsoft Windows-based client machines. </para> + <para><emphasis role="bold">To manually mount a Gluster volume using CIFS </emphasis></para> + <orderedlist> + <listitem> + <para>Using Windows Explorer, choose <emphasis role="bold">Tools > Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para> + </listitem> + <listitem> + <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para> + </listitem> + <listitem> + <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para> + </listitem> + <listitem> + <para>Click <emphasis role="bold">Finish.</emphasis></para> + </listitem> + </orderedlist> + <para>The network drive (mapped to the volume) appears in the Computer window.</para> + <para><emphasis role="bold">Alternatively, to manually mount a Gluster volume using CIFS.</emphasis></para> + <itemizedlist> + <listitem> + <para>Click <emphasis role="bold">Start > Run</emphasis> and enter the following:</para> + <para><command> + <code>\\SERVERNAME\VOLNAME</code> + </command></para> + <para>For example:</para> + <para><command> + <code>\\server1\test-volume</code> + </command></para> + </listitem> + </itemizedlist> + </section> + <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic"> + <title>Automatically Mounting Volumes Using CIFS</title> + <para>You can configure your system to automatically mount Gluster volumes using CIFS on Microsoft Windows-based clients each time the system starts.</para> + <para><emphasis role="bold">To automatically mount a Gluster volume using CIFS</emphasis></para> + <para>The network drive (mapped to the volume) appears in the Computer window and is reconnected each time the system starts.</para> + <orderedlist> + <listitem> + <para>Using Windows Explorer, choose <emphasis role="bold">Tools > Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para> + </listitem> + <listitem> + <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para> + </listitem> + <listitem> + <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para> + </listitem> + <listitem> + <para>Click the <emphasis role="bold">Reconnect</emphasis> at logon checkbox.</para> + </listitem> + <listitem> + <para>Click <emphasis role="bold">Finish.</emphasis></para> + </listitem> + </orderedlist> + </section> + <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Testing"> + <title>Testing Volumes Mounted Using CIFS</title> + <para>You can confirm that Gluster directories are mounting successfully by navigating to the directory using Windows Explorer. </para> + </section> + </section> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_start_stop_daemon.xml b/doc/legacy/docbook/admin_start_stop_daemon.xml new file mode 100644 index 00000000000..bdab0b8b608 --- /dev/null +++ b/doc/legacy/docbook/admin_start_stop_daemon.xml @@ -0,0 +1,56 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ +<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent"> +%BOOK_ENTITIES; +]> +<chapter id="chap-Administration_Guide-Start_Stop_Daemon"> + <title id="chap-Administration_Guide-Stop_Start_Daemon">Managing the glusterd Service</title> + <para>After installing GlusterFS, you must start glusterd service. The glusterd service serves as the Gluster elastic volume manager, overseeing glusterfs processes, and co-ordinating dynamic volume operations, such as adding and removing volumes across multiple storage servers non-disruptively.</para> + <para>This section describes how to start the glusterd service in the following ways: </para> + <itemizedlist> + <listitem> + <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Manually"/></para> + </listitem> + <listitem> + <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Automatically"/></para> + </listitem> + </itemizedlist> + <note> + <para>You must start glusterd on all GlusterFS servers.</para> + </note> + <section id="sect-Administration_Guide-Start_Stop_Daemon-Manually"> + <title>Starting and Stopping glusterd Manually</title> + <para>This section describes how to start and stop glusterd manually</para> + <itemizedlist> + <listitem> + <para>To start glusterd manually, enter the following command:</para> + <para><command># /etc/init.d/glusterd start </command></para> + </listitem> + <listitem> + <para>To stop glusterd manually, enter the following command: </para> + <para><command># /etc/init.d/glusterd stop</command></para> + </listitem> + </itemizedlist> + </section> + <section id="sect-Administration_Guide-Start_Stop_Daemon-Automatically"> + <title>Starting glusterd Automatically</title> + <para condition="gfs">This section describes how to configure the system to automatically start the glusterd service every time the system boots. </para> + <para condition="appliance">To automatically start the glusterd service every time the system boots, enter the following from the command line: </para> + <para condition="appliance"><command># chkconfig glusterd on </command></para> + <section condition="gfs"> + <title condition="gfs">Red Hat-based Systems</title> + <para>To configure Red Hat-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line: </para> + <para><command># chkconfig glusterd on </command></para> + </section> + <section condition="gfs"> + <title condition="gfs">Debian-based Systems</title> + <para>To configure Debian-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line:</para> + <para><command># update-rc.d glusterd defaults</command></para> + </section> + <section condition="gfs"> + <title condition="gfs">Systems Other than Red Hat and Debain</title> + <para>To configure systems other than Red Hat or Debian to automatically start the glusterd service every time the system boots, enter the following entry to the<emphasis role="italic"> /etc/rc.local</emphasis> file: </para> + <para><command># echo "glusterd" >> /etc/rc.local </command></para> + </section> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_storage_pools.xml b/doc/legacy/docbook/admin_storage_pools.xml new file mode 100644 index 00000000000..87b6320bd4b --- /dev/null +++ b/doc/legacy/docbook/admin_storage_pools.xml @@ -0,0 +1,57 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter id="chap-Administration_Guide-Storage-pool"> + <title>Setting up Trusted Storage Pools</title> + <para>Before you can configure a GlusterFS volume, you must create a trusted storage pool consisting of the storage servers that provides bricks to a volume. </para> + <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone. To add additional storage servers to the storage pool, you can use the probe command from a storage server that is already trusted. </para> + <para><note> + <para>Do not self-probe the first server/localhost.</para> + </note></para> + <para>The GlusterFS service must be running on all storage servers that you want to add to the storage pool. See <xref linkend="chap-Administration_Guide-Start_Stop_Daemon"/> for more information.</para> + <section id="sect-Administration_Guide-Storage_Pools-Adding_Servers"> + <title>Adding Servers to Trusted Storage Pool</title> + <para>To create a trusted storage pool, add servers to the trusted storage pool</para> + <orderedlist> + <listitem> + <para>The hostnames used to create the storage pool must be resolvable by DNS.</para> + <para>To add a server to the storage pool:</para> + <para><command># gluster peer probe <replaceable>server</replaceable></command></para> + <para>For example, to create a trusted storage pool of four servers, add three servers to the storage pool from server1:</para> + <para><programlisting># gluster peer probe server2 +Probe successful + +# gluster peer probe server3 +Probe successful + +# gluster peer probe server4 +Probe successful +</programlisting></para> + </listitem> + <listitem> + <para>Verify the peer status from the first server using the following commands:</para> + <para><programlisting># gluster peer status +Number of Peers: 3 + +Hostname: server2 +Uuid: 5e987bda-16dd-43c2-835b-08b7d55e94e5 +State: Peer in Cluster (Connected) + +Hostname: server3 +Uuid: 1e0ca3aa-9ef7-4f66-8f15-cbc348f29ff7 +State: Peer in Cluster (Connected) + +Hostname: server4 +Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7 +State: Peer in Cluster (Connected)</programlisting></para> + </listitem> + </orderedlist> + </section> + <section> + <title>Removing Servers from the Trusted Storage Pool</title> + <para>To remove a server from the storage pool:</para> + <para><command># gluster peer detach<replaceable> server</replaceable></command></para> + <para> For example, to remove server4 from the trusted storage pool:</para> + <para><programlisting># gluster peer detach server4 +Detach successful</programlisting></para> + </section> +</chapter> diff --git a/doc/legacy/docbook/admin_troubleshooting.xml b/doc/legacy/docbook/admin_troubleshooting.xml new file mode 100644 index 00000000000..af1259ada43 --- /dev/null +++ b/doc/legacy/docbook/admin_troubleshooting.xml @@ -0,0 +1,518 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<!-- + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +--> +<chapter id="chap-Administration_Guide-Troubleshooting"> + <title>Troubleshooting GlusterFS </title> + <para>This section describes how to manage GlusterFS logs and most common troubleshooting scenarios +related to GlusterFS. +</para> + <section> + <title>Managing GlusterFS Logs </title> + <para>This section describes how to manage GlusterFS logs by performing the following operation: + +</para> + <itemizedlist> + <listitem> + <para>Rotating Logs +</para> + </listitem> + </itemizedlist> + <section> + <title>Rotating Logs </title> + <para>Administrators can rotate the log file in a volume, as needed. +</para> + <para><emphasis role="bold">To rotate a log file </emphasis></para> + <itemizedlist> + <listitem> + <para>Rotate the log file using the following command: +</para> + <para><command># gluster volume log rotate <replaceable>VOLNAME</replaceable></command></para> + <para>For example, to rotate the log file on test-volume: +</para> + <programlisting># gluster volume log rotate test-volume +log rotate successful +</programlisting> + <note> + <para>When a log file is rotated, the contents of the current log file are moved to log-file- +name.epoch-time-stamp. +</para> + </note> + </listitem> + </itemizedlist> + </section> + </section> + <section> + <title>Troubleshooting Geo-replication </title> + <para>This section describes the most common troubleshooting scenarios related to GlusterFS Geo-replication. +</para> + <section> + <title>Locating Log Files </title> + <para>For every Geo-replication session, the following three log files are associated to it (four, if the slave is a +gluster volume): +</para> + <itemizedlist> + <listitem> + <para>Master-log-file - log file for the process which monitors the Master volume +</para> + </listitem> + <listitem> + <para>Slave-log-file - log file for process which initiates the changes in slave +</para> + </listitem> + <listitem> + <para>Master-gluster-log-file - log file for the maintenance mount point that Geo-replication module +uses to monitor the master volume +</para> + </listitem> + <listitem> + <para>Slave-gluster-log-file - is the slave's counterpart of it +</para> + </listitem> + </itemizedlist> + <para><emphasis role="bold">Master Log File</emphasis> +</para> + <para>To get the Master-log-file for geo-replication, use the following command: +</para> + <para><command>gluster volume geo-replication <code>MASTER SLAVE</code> config log-file</command> +</para> + <para>For example: +</para> + <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config log-file </command></para> + <para><emphasis role="bold">Slave Log File </emphasis></para> + <para>To get the log file for Geo-replication on slave (glusterd must be running on slave machine), use the +following commands: +</para> + <orderedlist> + <listitem> + <para>On master, run the following command: +</para> + <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config session-owner 5f6e5200-756f-11e0-a1f0-0800200c9a66 </command></para> + <para>Displays the session owner details. +</para> + </listitem> + <listitem> + <para>On slave, run the following command: +</para> + <para><command># gluster volume geo-replication /data/remote_dir config log-file /var/log/gluster/${session-owner}:remote-mirror.log </command></para> + </listitem> + <listitem> + <para>Replace the session owner details (output of Step 1) to the output of the Step 2 to get the +location of the log file. +</para> + <para><command>/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log</command> +</para> + </listitem> + </orderedlist> + </section> + <section> + <title>Rotating Geo-replication Logs</title> + <para>Administrators can rotate the log file of a particular master-slave session, as needed. +When you run geo-replication's <command> log-rotate</command> command, the log file +is backed up with the current timestamp suffixed to the file +name and signal is sent to gsyncd to start logging to a new +log file.</para> + <para><emphasis role="bold">To rotate a geo-replication log file </emphasis></para> + <itemizedlist> + <listitem> + <para>Rotate log file for a particular master-slave session using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>master slave</replaceable> log-rotate</command> +</para> + <para>For example, to rotate the log file of master <filename>Volume1</filename> and slave <filename>example.com:/data/remote_dir</filename> +: +</para> + <programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir log rotate +log rotate successful</programlisting> + </listitem> + <listitem> + <para>Rotate log file for all sessions for a master volume using the following command: +</para> + <para><command># gluster volume geo-replication <replaceable>master</replaceable> log-rotate</command> +</para> + <para>For example, to rotate the log file of master <filename>Volume1</filename>: +</para> + <programlisting># gluster volume geo-replication Volume1 log rotate +log rotate successful</programlisting> + </listitem> + <listitem> + <para>Rotate log file for all sessions using the following command: +</para> + <para><command># gluster volume geo-replication log-rotate</command> +</para> + <para>For example, to rotate the log file for all sessions:</para> + <programlisting># gluster volume geo-replication log rotate +log rotate successful</programlisting> + </listitem> + </itemizedlist> + </section> + <section> + <title>Synchronization is not complete </title> + <para><emphasis role="bold">Description</emphasis>: GlusterFS Geo-replication did not synchronize the data completely but still the geo- +replication status displayed is OK. +</para> + <para><emphasis role="bold">Solution</emphasis>: You can enforce a full sync of the data by erasing the index and restarting GlusterFS Geo- +replication. After restarting, GlusterFS Geo-replication begins synchronizing all the data. All files are compared using checksum, which can be a lengthy and high resource utilization operation on large +data sets. If the error situation persists, contact Red Hat Support. +</para> + <para>For more information about erasing index, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/>. +</para> + </section> + <section> + <title>Issues in Data Synchronization </title> + <para><emphasis role="bold">Description</emphasis>: Geo-replication display status as OK, but the files do not get synced, only +directories and symlink gets synced with the following error message in the log: +</para> + <para><errortext>[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to sync ./some_file` </errortext></para> + <para><emphasis role="bold">Solution</emphasis>: Geo-replication invokes rsync v3.0.0 or higher on the host and the remote machine. You must verify if +you have installed the required version. +</para> + </section> + <section> + <title>Geo-replication status displays Faulty very often </title> + <para><emphasis role="bold">Description</emphasis>: Geo-replication displays status as faulty very often with a backtrace similar to +the following: +</para> + <para><errortext>2011-04-28 14:06:18.378859] E [syncdutils:131:log_raise_exception] <top>: FAIL: Traceback (most recent call last): File "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 152, in twraptf(*aa) File "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in listen rid, exc, res = recv(self.inf) File "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 42, in recv return pickle.load(inf) EOFError </errortext></para> + <para><emphasis role="bold">Solution</emphasis>: This error indicates that the RPC communication between the master gsyncd module and slave +gsyncd module is broken and this can happen for various reasons. Check if it satisfies all the following +pre-requisites: +</para> + <itemizedlist> + <listitem> + <para>Password-less SSH is set up properly between the host and the remote machine. +</para> + </listitem> + <listitem> + <para>If FUSE is installed in the machine, because geo-replication module mounts the GlusterFS volume +using FUSE to sync data. +</para> + </listitem> + <listitem> + <para>If the <emphasis role="bold">Slave</emphasis> is a volume, check if that volume is started. +</para> + </listitem> + <listitem> + <para>If the Slave is a plain directory, verify if the directory has been created already with the +required permissions. +</para> + </listitem> + <listitem> + <para>If GlusterFS 3.2 or higher is not installed in the default location (in Master) and has been prefixed to be +installed in a custom location, configure the <command>gluster-command</command> for it to point to the exact +location. +</para> + </listitem> + <listitem> + <para>If GlusterFS 3.2 or higher is not installed in the default location (in slave) and has been prefixed to be +installed in a custom location, configure the <command>remote-gsyncd-command</command> for it to point to the +exact place where gsyncd is located. +</para> + </listitem> + </itemizedlist> + </section> + <section> + <title>Intermediate Master goes to Faulty State </title> + <para><emphasis role="bold">Description</emphasis>: In a cascading set-up, the intermediate master goes to faulty state with the following +log: +</para> + <para><errortext>raise RuntimeError ("aborting on uuid change from %s to %s" % \ RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f- 4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154 </errortext></para> + <para><emphasis role="bold">Solution</emphasis>: In a cascading set-up the Intermediate master is loyal to the original primary master. The +above log means that the geo-replication module has detected change in primary master. +If this is the desired behavior, delete the config option volume-id in the session initiated from the +intermediate master. +</para> + </section> + </section> + <section> + <title>Troubleshooting POSIX ACLs </title> + <para>This section describes the most common troubleshooting issues related to POSIX ACLs. +</para> + <section> + <title>setfacl command fails with “setfacl: <file or directory name>: Operation not supported” error </title> + <para>You may face this error when the backend file systems in one of the servers is not mounted with +the "-o acl" option. The same can be confirmed by viewing the following error message in the log file +of the server "Posix access control list is not supported". +</para> + <para><emphasis role="bold">Solution</emphasis>: Remount the backend file system with "-o acl" option. For more information, see <xref linkend="sect-Administration_Guide-ACLs-Activating_ACLs-Server"/>. +</para> + </section> + </section> + <section> + <title>Troubleshooting Hadoop Compatible Storage </title> + <para>This section describes the most common troubleshooting issues related to Hadoop Compatible +Storage. + + </para> + <section id="sect-Administration_Guide-Troubleshooting-Test_Section_1"> + <title>Time Sync</title> + <para>Running MapReduce job may throw exceptions if the time is out-of-sync on the hosts in the cluster. + + </para> + <para><emphasis role="bold">Solution</emphasis>: Sync the time on all hosts using ntpd program. +</para> + </section> + </section> + <section> + <title>Troubleshooting NFS </title> + <para>This section describes the most common troubleshooting issues related to NFS . +</para> + <section> + <title>mount command on NFS client fails with “RPC Error: Program not registered” </title> + <para>Start portmap or rpcbind service on the NFS server. +</para> + <para>This error is encountered when the server has not started correctly. +</para> + <para>On most Linux distributions this is fixed by starting portmap: +</para> + <para><command>$ /etc/init.d/portmap start</command> +</para> + <para>On some distributions where portmap has been replaced by rpcbind, the following command is +required: +</para> + <para><command>$ /etc/init.d/rpcbind start </command></para> + <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted. +</para> + </section> + <section> + <title>NFS server start-up fails with “Port is already in use” error in the log file." </title> + <para>Another Gluster NFS server is running on the same machine. +</para> + <para>This error can arise in case there is already a Gluster NFS server running on the same machine. +This situation can be confirmed from the log file, if the following error lines exist: +</para> + <para><screen>[2010-05-26 23:40:49] E [rpc-socket.c:126:rpcsvc_socket_listen] rpc-socket: binding socket failed:Address already in use +[2010-05-26 23:40:49] E [rpc-socket.c:129:rpcsvc_socket_listen] rpc-socket: Port is already in use +[2010-05-26 23:40:49] E [rpcsvc.c:2636:rpcsvc_stage_program_register] rpc-service: could not create listening connection +[2010-05-26 23:40:49] E [rpcsvc.c:2675:rpcsvc_program_register] rpc-service: stage registration of program failed +[2010-05-26 23:40:49] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465 +[2010-05-26 23:40:49] E [nfs.c:125:nfs_init_versions] nfs: Program init failed +[2010-05-26 23:40:49] C [nfs.c:531:notify] nfs: Failed to initialize protocols</screen></para> + <para>To resolve this error one of the Gluster NFS servers will have to be shutdown. At this time, +Gluster NFS server does not support running multiple NFS servers on the same machine. +</para> + </section> + <section> + <title>mount command fails with “rpc.statd” related error message </title> + <para>If the mount command fails with the following error message: +</para> + <para><errortext>mount.nfs: rpc.statd is not running but is required for remote locking. mount.nfs: Either use '-o nolock' to keep locks local, or start statd. </errortext></para> + <para><errortext>Start rpc.statd </errortext></para> + <para>For NFS clients to mount the NFS server, rpc.statd service must be running on the clients. </para> + <para>Start +rpc.statd service by running the following command: +</para> + <para><command>$ rpc.statd </command></para> + </section> + <section> + <title>mount command takes too long to finish. </title> + <para>Start rpcbind service on the NFS client. +</para> + <para>The problem is that the rpcbind or portmap service is not running on the NFS client. The +resolution for this is to start either of these services by running the following command: +</para> + <para><command>$ /etc/init.d/portmap start</command> +</para> + <para>On some distributions where portmap has been replaced by rpcbind, the following command is +required: +</para> + <para><command>$ /etc/init.d/rpcbind start</command></para> + </section> + <section> + <title>NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log. </title> + <para>NFS start-up can succeed but the initialization of the NFS service can still fail preventing clients +from accessing the mount points. Such a situation can be confirmed from the following error +messages in the log file: +</para> + <para><screen>[2010-05-26 23:33:47] E [rpcsvc.c:2598:rpcsvc_program_register_portmap] rpc-service: Could notregister with portmap +[2010-05-26 23:33:47] E [rpcsvc.c:2682:rpcsvc_program_register] rpc-service: portmap registration of program failed +[2010-05-26 23:33:47] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465 +[2010-05-26 23:33:47] E [nfs.c:125:nfs_init_versions] nfs: Program init failed +[2010-05-26 23:33:47] C [nfs.c:531:notify] nfs: Failed to initialize protocols +[2010-05-26 23:33:49] E [rpcsvc.c:2614:rpcsvc_program_unregister_portmap] rpc-service: Could not unregister with portmap +[2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed +[2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465</screen></para> + <orderedlist> + <listitem> + <para>Start portmap or rpcbind service on the NFS server. +</para> + <para>On most Linux distributions, portmap can be started using the following command: +</para> + <para><command>$ /etc/init.d/portmap start </command></para> + <para>On some distributions where portmap has been replaced by rpcbind, run the following command: +</para> + <para><command>$ /etc/init.d/rpcbind start </command></para> + <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted. +</para> + </listitem> + <listitem> + <para>Stop another NFS server running on the same machine. +</para> + <para>Such an error is also seen when there is another NFS server running on the same machine but it is +not the Gluster NFS server. On Linux systems, this could be the kernel NFS server. Resolution +involves stopping the other NFS server or not running the Gluster NFS server on the machine. +Before stopping the kernel NFS server, ensure that no critical service depends on access to that +NFS server's exports. +</para> + <para>On Linux, kernel NFS servers can be stopped by using either of the following commands +depending on the distribution in use: +</para> + <para><command>$ /etc/init.d/nfs-kernel-server stop</command> +</para> + <para><command>$ /etc/init.d/nfs stop</command></para> + </listitem> + <listitem> + <para>Restart Gluster NFS server. +</para> + </listitem> + </orderedlist> + </section> + <section> + <title>mount command fails with NFS server failed error. </title> + <para>mount command fails with following error +</para> + <para><emphasis role="italic">mount: mount to NFS server '10.1.10.11' failed: timed out (retrying).</emphasis></para> + <para>Perform one of the following to resolve this issue: +</para> + <orderedlist> + <listitem> + <para>Disable name lookup requests from NFS server to a DNS server. +</para> + <para>The NFS server attempts to authenticate NFS clients by performing a reverse DNS lookup to +match hostnames in the volume file with the client IP addresses. There can be a situation where +the NFS server either is not able to connect to the DNS server or the DNS server is taking too long +to responsd to DNS request. These delays can result in delayed replies from the NFS server to the +NFS client resulting in the timeout error seen above. +</para> + <para>NFS server provides a work-around that disables DNS requests, instead relying only on the client +IP addresses for authentication. The following option can be added for successful mounting in +such situations: +</para> + <para><command>option rpc-auth.addr.namelookup off </command></para> + <para><note> + <para>Note: Remember that disabling the NFS server forces authentication of clients to use only IP +addresses and if the authentication rules in the volume file use hostnames, those authentication +rules will fail and disallow mounting for those clients. +</para> + </note></para> + <para>or</para> + </listitem> + <listitem> + <para>NFS version used by the NFS client is other than version 3. +</para> + <para>Gluster NFS server supports version 3 of NFS protocol. In recent Linux kernels, the default NFS +version has been changed from 3 to 4. It is possible that the client machine is unable to connect +to the Gluster NFS server because it is using version 4 messages which are not understood by +Gluster NFS server. The timeout can be resolved by forcing the NFS client to use version 3. The +<emphasis role="bold">vers</emphasis> option to mount command is used for this purpose: +</para> + <para><command>$ mount <replaceable>nfsserver</replaceable><replaceable>:export</replaceable> -o vers=3 <replaceable>mount-point</replaceable></command> +</para> + </listitem> + </orderedlist> + </section> + <section> + <title>showmount fails with clnt_create: RPC: Unable to receive </title> + <para>Check your firewall setting to open ports 111 for portmap requests/replies and Gluster NFS +server requests/replies. Gluster NFS server operates over the following port numbers: 38465, +38466, and 38467. +</para> + <para>For more information, see <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-RPM"/>. +</para> + </section> + <section> + <title>Application fails with "Invalid argument" or "Value too large for defined data type" error. </title> + <para>These two errors generally happen for 32-bit nfs clients or applications that do not support 64-bit +inode numbers or large files. +Use the following option from the CLI to make Gluster NFS return 32-bit inode numbers instead: +nfs.enable-ino32 <on|off> +</para> + <para>Applications that will benefit are those that were either: +</para> + <itemizedlist> + <listitem> + <para>built 32-bit and run on 32-bit machines such that they do not support large files by default</para> + </listitem> + <listitem> + <para>built 32-bit on 64-bit systems +</para> + </listitem> + </itemizedlist> + <para>This option is disabled by default so NFS returns 64-bit inode numbers by default. +</para> + <para>Applications which can be rebuilt from source are recommended to rebuild using the following +flag with gcc:</para> + <para><command> -D_FILE_OFFSET_BITS=64</command> +</para> + </section> + </section> + <section> + <title>Troubleshooting File Locks</title> + <para>In GlusterFS 3.3 you can use <command>statedump</command> command to list the locks held on files. The statedump output also provides information on each lock with its range, basename, PID of the application holding the lock, and so on. You can analyze the output to know about the locks whose owner/application is no longer running or interested in that lock. After ensuring that the no application is using the file, you can clear the lock using the following <command>clear lock</command> command:</para> + <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind {blocked | granted | all}{inode [range] | entry [basename] | posix [range]}</command></command></para> + <para>For more information on performing <command>statedump</command>, see <xref linkend="sect-Administration_Guide-Monitor_Workload-Performing_Statedump"/></para> + <para><emphasis role="bold">To identify locked file and clear locks</emphasis></para> + <orderedlist> + <listitem> + <para>Perform statedump on the volume to view the files that are locked using the following command:</para> + <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> inode</command></para> + <para>For example, to display statedump of test-volume:</para> + <para><programlisting># gluster volume statedump test-volume +Volume statedump successful</programlisting></para> + <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename><brick-path>.<brick-pid>.dump</filename>.</para> + <para>The following are the sample contents of the statedump file. It indicates that GlusterFS has entered into a state where there is an entry lock (entrylk) and an inode lock (inodelk). Ensure that those are stale locks and no resources own them. </para> + <para><screen>[xlator.features.locks.vol-locks.inode] +path=/ +mandatory=0 +entrylk-count=1 +lock-dump.domain.domain=vol-replicate-0 +xlator.feature.locks.lock-dump.domain.entrylk.entrylk[0](ACTIVE)=type=ENTRYLK_WRLCK on basename=file1, pid = 714782904, owner=ffffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012 + +conn.2.bound_xl./gfs/brick1.hashsize=14057 +conn.2.bound_xl./gfs/brick1.name=/gfs/brick1/inode +conn.2.bound_xl./gfs/brick1.lru_limit=16384 +conn.2.bound_xl./gfs/brick1.active_size=2 +conn.2.bound_xl./gfs/brick1.lru_size=0 +conn.2.bound_xl./gfs/brick1.purge_size=0 + +[conn.2.bound_xl./gfs/brick1.active.1] +gfid=538a3d4a-01b0-4d03-9dc9-843cd8704d07 +nlookup=1 +ref=2 +ia_type=1 +[xlator.features.locks.vol-locks.inode] +path=/file1 +mandatory=0 +inodelk-count=1 +lock-dump.domain.domain=vol-replicate-0 +inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012</screen></para> + </listitem> + <listitem> + <para>Clear the lock using the following command:</para> + <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted entry basename</command></command></para> + <para>For example, to clear the entry lock on <filename>file1</filename> of test-volume: +</para> + <para><screen># gluster volume clear-locks test-volume / kind granted entry file1 +Volume clear-locks successful +vol-locks: entry blocked locks=0 granted locks=1</screen></para> + </listitem> + <listitem> + <para>Clear the inode lock using the following command:</para> + <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted inode range </command></command></para> + <para>For example, to clear the inode lock on <filename>file1</filename> of test-volume: +</para> + <para><screen># gluster volume clear-locks test-volume /file1 kind granted inode 0,0-0 +Volume clear-locks successful +vol-locks: inode blocked locks=0 granted locks=1</screen></para> + <para>You can perform statedump on test-volume again to verify that the above inode and entry locks are cleared.</para> + </listitem> + </orderedlist> + </section> +</chapter> diff --git a/doc/legacy/docbook/gfs_introduction.xml b/doc/legacy/docbook/gfs_introduction.xml new file mode 100644 index 00000000000..5fd88730556 --- /dev/null +++ b/doc/legacy/docbook/gfs_introduction.xml @@ -0,0 +1,54 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter> + <title>Introducing Gluster File System</title> + <para>GlusterFS is an open source, clustered file system capable of scaling to several petabytes and handling thousands of clients. GlusterFS can be flexibly combined with commodity physical, virtual, and cloud resources to deliver highly available and performant enterprise storage at a fraction of the cost of traditional solutions.</para> + <para>GlusterFS clusters together storage building blocks over Infiniband RDMA and/or TCP/IP interconnect, aggregating disk and memory resources and managing data in a single global namespace. GlusterFS is based on a stackable user space design, delivering exceptional performance for diverse workloads. +</para> + <figure> + <title>Virtualized Cloud Environments</title> + <mediaobject> + <textobject> + <phrase>Virtualized Cloud Environments</phrase> + </textobject> + <imageobject> + <imagedata align="center" fileref="images/640px-GlusterFS_3.2_Architecture.png"/> + </imageobject> + </mediaobject> + </figure> + <para>GlusterFS is designed for today's high-performance, virtualized cloud environments. Unlike traditional data centers, cloud environments require multi-tenancy along with the ability to grow or shrink resources on demand. Enterprises can scale capacity, performance, and availability on demand, with no vendor lock-in, across on-premise, public cloud, and hybrid environments. </para> + <para>GlusterFS is in production at thousands of enterprises spanning media, healthcare, government, education, web 2.0, and financial services. The following table lists the commercial offerings and its documentation location: +</para> + <informaltable frame="all"> + <tgroup cols="2"> + <colspec colname="c1" colwidth="16%"/> + <colspec colname="c2" colwidth="84%"/> + <thead> + <row> + <entry>Product</entry> + <entry>Documentation Location</entry> + </row> + </thead> + <tbody> + <row> + <entry>Red Hat Storage Software Appliance</entry> + <entry> + <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html"/> + </entry> + </row> + <row> + <entry>Red Hat Virtual Storage Appliance</entry> + <entry> + <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html"/> + </entry> + </row> + <row> + <entry>Red Hat Storage </entry> + <entry> + <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html"/> + </entry> + </row> + </tbody> + </tgroup> + </informaltable> +</chapter> diff --git a/doc/legacy/docbook/glossary.xml b/doc/legacy/docbook/glossary.xml new file mode 100644 index 00000000000..a8544b8cd38 --- /dev/null +++ b/doc/legacy/docbook/glossary.xml @@ -0,0 +1,126 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []> +<chapter> + <title>Glossary</title> + <glosslist> + <glossentry> + <glossterm>Brick</glossterm> + <glossdef> + <para>A Brick is the GlusterFS basic unit of storage, represented by an export directory on a server in the trusted storage pool. A Brick is expressed by combining a server with an export directory in the following format:</para> + <para><code>SERVER:EXPORT</code></para> + <para>For example:</para> + <para><filename>myhostname:/exports/myexportdir/</filename></para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Cluster</glossterm> + <glossdef> + <para>A cluster is a group of linked computers, working together closely thus in many respects forming a single computer.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Distributed File System</glossterm> + <glossdef> + <para>A file system that allows multiple clients to concurrently access data over a computer network.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Filesystem</glossterm> + <glossdef> + <para>A method of storing and organizing computer files and their data. Essentially, it organizes these files into a database for the storage, organization, manipulation, and retrieval by the computer's operating system.</para> + <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem">Wikipedia</ulink></para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>FUSE</glossterm> + <glossdef> + <para>Filesystem in Userspace (<acronym>FUSE</acronym>) is a loadable kernel module for Unix-like computer operating systems that lets non-privileged users create their own file systems without editing kernel code. This is achieved by running file system code in user space while the <acronym>FUSE</acronym> module provides only a "bridge" to the actual kernel interfaces.</para> + <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem_in_Userspace">Wikipedia</ulink></para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Geo-Replication</glossterm> + <glossdef> + <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from site to another over Local Area Networks (<acronym>LAN</acronym>), Wide Area Network (<acronym>WAN</acronym>), and across the Internet.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>glusterd</glossterm> + <glossdef> + <para>The Gluster management daemon that needs to run on all servers in the trusted storage pool.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Metadata</glossterm> + <glossdef> + <para>Metadata is data providing information about one or more other pieces of data.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Namespace</glossterm> + <glossdef> + <para>Namespace is an abstract container or environment created to hold a logical grouping of unique identifiers or symbols. Each Gluster volume exposes a single namespace as a POSIX mount point that contains every file in the cluster.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Open Source</glossterm> + <glossdef> + <para>Open source describes practices in production and development that promote access to the end product's source materials. Some consider open source a philosophy, others consider it a pragmatic methodology.</para> + <para>Before the term open source became widely adopted, developers and producers used a variety of phrases to describe the concept; open source gained hold with the rise of the Internet, and the attendant need for massive retooling of the computing source code.</para> + <para>Opening the source code enabled a self-enhancing diversity of production models, communication paths, and interactive communities. Subsequently, a new, three-word phrase "open source software" was born to describe the environment that the new copyright, licensing, domain, and consumer issues created.</para> + <para>Source: <ulink url="http://en.wikipedia.org/wiki/Open_source">Wikipedia</ulink></para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Petabyte</glossterm> + <glossdef> + <para>A petabyte (derived from the SI prefix peta- ) is a unit of information equal to one quadrillion (short scale) bytes, or 1000 terabytes. The unit symbol for the petabyte is PB. The prefix peta- (P) indicates a power of 1000:</para> + <para>1 PB = 1,000,000,000,000,000 B = 10005 B = 1015 B.</para> + <para>The term "pebibyte" (<acronym>PiB</acronym>), using a binary prefix, is used for the corresponding power of 1024.</para> + <para>Source: <ulink url="http://en.wikipedia.org/wiki/Petabyte">Wikipedia</ulink></para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>POSIX</glossterm> + <glossdef> + <para>Portable Operating System Interface (for Unix) is the name of a family of related standards specified by the IEEE to define the application programming interface (<acronym>API</acronym>), along with shell and utilities interfaces for software compatible with variants of the Unix operating system. Gluster exports a fully <acronym>POSIX</acronym> compliant file system.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>RAID</glossterm> + <glossdef> + <para>Redundant Array of Inexpensive Disks (<acronym>RAID</acronym>) is a technology that provides increased storage reliability through redundancy, combining multiple low-cost, less-reliable disk drives components into a logical unit where all drives in the array are interdependent.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>RRDNS</glossterm> + <glossdef> + <para>Round Robin Domain Name Service (<acronym>RRDNS</acronym>) is a method to distribute load across application servers. <acronym>RRDNS</acronym> is implemented by creating multiple A records with the same name and different IP addresses in the zone file of a DNS server.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Trusted Storage Pool</glossterm> + <glossdef> + <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Userspace</glossterm> + <glossdef> + <para>Applications running in user space don’t directly interact with hardware, instead using the kernel to moderate access. Userspace applications are generally more portable than applications in kernel space. Gluster is a user space application.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Volfile</glossterm> + <glossdef> + <para>Volfile is a configuration file used by glusterfs process. Volfile will be usually located at <filename>/var/lib/glusterd/vols/VOLNAME</filename>.</para> + </glossdef> + </glossentry> + <glossentry> + <glossterm>Volume</glossterm> + <glossdef> + <para>A volume is a logical collection of bricks. Most of the gluster management operations happen on the volume.</para> + </glossdef> + </glossentry> + </glosslist> +</chapter> diff --git a/doc/legacy/docbook/publican.cfg b/doc/legacy/docbook/publican.cfg new file mode 100644 index 00000000000..e42fa1b3dc8 --- /dev/null +++ b/doc/legacy/docbook/publican.cfg @@ -0,0 +1,12 @@ +# Config::Simple 4.59 +# Thu Apr 5 11:09:15 2012 + +xml_lang: "en-US" +type: Book +brand: Gluster_Brand +prod_url: http://www.gluster.org +doc_url: http://www.gluster.com/community/documentation/index.php/Main_Page +condition: gfs +show_remarks: 1 + + diff --git a/doc/legacy/fdl.texi b/doc/legacy/fdl.texi new file mode 100644 index 00000000000..e33c687cdfb --- /dev/null +++ b/doc/legacy/fdl.texi @@ -0,0 +1,454 @@ + +@c @node GNU Free Documentation License +@c @appendixsec GNU Free Documentation License + +@cindex FDL, GNU Free Documentation License +@center Version 1.2, November 2002 + +@display +Copyright @copyright{} 2000,2001,2002 Free Software Foundation, Inc. +59 Temple Place, Suite 330, Boston, MA 02111-1307, USA + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +@end display + +@enumerate 0 +@item +PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +functional and useful document @dfn{free} in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. + +This License is a kind of ``copyleft'', which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + +@item +APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The ``Document'', below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as ``you''. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. + +A ``Modified Version'' of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A ``Secondary Section'' is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The ``Invariant Sections'' are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. + +The ``Cover Texts'' are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. + +A ``Transparent'' copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not ``Transparent'' is called ``Opaque''. + +Examples of suitable formats for Transparent copies include plain +@sc{ascii} without markup, Texinfo input format, La@TeX{} input +format, @acronym{SGML} or @acronym{XML} using a publicly available +@acronym{DTD}, and standard-conforming simple @acronym{HTML}, +PostScript or @acronym{PDF} designed for human modification. Examples +of transparent image formats include @acronym{PNG}, @acronym{XCF} and +@acronym{JPG}. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, @acronym{SGML} or +@acronym{XML} for which the @acronym{DTD} and/or processing tools are +not generally available, and the machine-generated @acronym{HTML}, +PostScript or @acronym{PDF} produced by some word processors for +output purposes only. + +The ``Title Page'' means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, ``Title Page'' means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + +A section ``Entitled XYZ'' means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as ``Acknowledgements'', +``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' +of such a section when you modify the Document means that it remains a +section ``Entitled XYZ'' according to this definition. + +The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. + +@item +VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + +@item +COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. + +@item +MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +@enumerate A +@item +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +@item +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +@item +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +@item +Preserve all the copyright notices of the Document. + +@item +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +@item +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +@item +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +@item +Include an unaltered copy of this License. + +@item +Preserve the section Entitled ``History'', Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled ``History'' in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +@item +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the ``History'' section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +@item +For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +@item +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +@item +Delete any section Entitled ``Endorsements''. Such a section +may not be included in the Modified Version. + +@item +Do not retitle any existing section to be Entitled ``Endorsements'' or +to conflict in title with any Invariant Section. + +@item +Preserve any Warranty Disclaimers. +@end enumerate + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section Entitled ``Endorsements'', provided it contains +nothing but endorsements of your Modified Version by various +parties---for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. + +@item +COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled ``History'' +in the various original documents, forming one section Entitled +``History''; likewise combine any sections Entitled ``Acknowledgements'', +and any sections Entitled ``Dedications''. You must delete all +sections Entitled ``Endorsements.'' + +@item +COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. + +@item +AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an ``aggregate'' if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. + +@item +TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. + +If a section in the Document is Entitled ``Acknowledgements'', +``Dedications'', or ``History'', the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. + +@item +TERMINATION + +You may not copy, modify, sublicense, or distribute the Document except +as expressly provided for under this License. Any other attempt to +copy, modify, sublicense or distribute the Document is void, and will +automatically terminate your rights under this License. However, +parties who have received copies, or rights, from you under this +License will not have their licenses terminated so long as such +parties remain in full compliance. + +@item +FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +@uref{http://www.gnu.org/copyleft/}. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. +@end enumerate + +@page +@c @appendixsubsec ADDENDUM: How to use this License for your +@c documents +@subsection ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + +@smallexample +@group + Copyright (C) @var{year} @var{your name}. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.2 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +@end group +@end smallexample + +If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the ``with...Texts.'' line with this: + +@smallexample +@group + with the Invariant Sections being @var{list their titles}, with + the Front-Cover Texts being @var{list}, and with the Back-Cover Texts + being @var{list}. +@end group +@end smallexample + +If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. + +@c Local Variables: +@c ispell-local-pdict: "ispell-dict" +@c End: + diff --git a/doc/legacy/fuse.odg b/doc/legacy/fuse.odg Binary files differnew file mode 100644 index 00000000000..61bd103c78b --- /dev/null +++ b/doc/legacy/fuse.odg diff --git a/doc/legacy/fuse.pdf b/doc/legacy/fuse.pdf Binary files differnew file mode 100644 index 00000000000..a7d13faff56 --- /dev/null +++ b/doc/legacy/fuse.pdf diff --git a/doc/legacy/ha.odg b/doc/legacy/ha.odg Binary files differnew file mode 100644 index 00000000000..e4b8b72d08b --- /dev/null +++ b/doc/legacy/ha.odg diff --git a/doc/legacy/ha.pdf b/doc/legacy/ha.pdf Binary files differnew file mode 100644 index 00000000000..e372c0ab03e --- /dev/null +++ b/doc/legacy/ha.pdf diff --git a/doc/legacy/stripe.odg b/doc/legacy/stripe.odg Binary files differnew file mode 100644 index 00000000000..79441bf1452 --- /dev/null +++ b/doc/legacy/stripe.odg diff --git a/doc/legacy/stripe.pdf b/doc/legacy/stripe.pdf Binary files differnew file mode 100644 index 00000000000..b94446feb56 --- /dev/null +++ b/doc/legacy/stripe.pdf diff --git a/doc/legacy/unify.odg b/doc/legacy/unify.odg Binary files differnew file mode 100644 index 00000000000..ccaa9bf16f9 --- /dev/null +++ b/doc/legacy/unify.odg diff --git a/doc/legacy/unify.pdf b/doc/legacy/unify.pdf Binary files differnew file mode 100644 index 00000000000..c22027f66e7 --- /dev/null +++ b/doc/legacy/unify.pdf diff --git a/doc/legacy/user-guide.info b/doc/legacy/user-guide.info new file mode 100644 index 00000000000..2bbadb35107 --- /dev/null +++ b/doc/legacy/user-guide.info @@ -0,0 +1,2697 @@ +This is ../../../doc/user-guide/user-guide.info, produced by makeinfo version 4.13 from ../../../doc/user-guide/user-guide.texi. + +START-INFO-DIR-ENTRY +* GlusterFS: (user-guide). GlusterFS distributed filesystem user guide +END-INFO-DIR-ENTRY + + This is the user manual for GlusterFS 2.0. + + Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to +copy, distribute and/or modify this document under the terms of the GNU +Free Documentation License, Version 1.2 or any later version published +by the Free Software Foundation; with no Invariant Sections, no +Front-Cover Texts, and no Back-Cover Texts. A copy of the license is +included in the chapter entitled "GNU Free Documentation License". + + +File: user-guide.info, Node: Top, Next: Acknowledgements, Up: (dir) + +GlusterFS 2.0 User Guide +************************ + +This is the user manual for GlusterFS 2.0. + + Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to +copy, distribute and/or modify this document under the terms of the GNU +Free Documentation License, Version 1.2 or any later version published +by the Free Software Foundation; with no Invariant Sections, no +Front-Cover Texts, and no Back-Cover Texts. A copy of the license is +included in the chapter entitled "GNU Free Documentation License". + +* Menu: + +* Acknowledgements:: +* Introduction:: +* Installation and Invocation:: +* Concepts:: +* Translators:: +* Usage Scenarios:: +* Troubleshooting:: +* GNU Free Documentation Licence:: +* Index:: + + --- The Detailed Node Listing --- + +Installation and Invocation + +* Pre requisites:: +* Getting GlusterFS:: +* Building:: +* Running GlusterFS:: +* A Tutorial Introduction:: + +Running GlusterFS + +* Server:: +* Client:: + +Concepts + +* Filesystems in Userspace:: +* Translator:: +* Volume specification file:: + +Translators + +* Storage Translators:: +* Client and Server Translators:: +* Clustering Translators:: +* Performance Translators:: +* Features Translators:: + +Storage Translators + +* POSIX:: + +Client and Server Translators + +* Transport modules:: +* Client protocol:: +* Server protocol:: + +Clustering Translators + +* Unify:: +* Replicate:: +* Stripe:: + +Performance Translators + +* Read Ahead:: +* Write Behind:: +* IO Threads:: +* IO Cache:: + +Features Translators + +* POSIX Locks:: +* Fixed ID:: + +Miscellaneous Translators + +* ROT-13:: +* Trace:: + + +File: user-guide.info, Node: Acknowledgements, Next: Introduction, Prev: Top, Up: Top + +Acknowledgements +**************** + +GlusterFS continues to be a wonderful and enriching experience for all +of us involved. + + GlusterFS development would not have been possible at this pace if +not for our enthusiastic users. People from around the world have +helped us with bug reports, performance numbers, and feature +suggestions. A huge thanks to them all. + + Matthew Paine - for RPMs & general enthu + + Leonardo Rodrigues de Mello - for DEBs + + Julian Perez & Adam D'Auria - for multi-server tutorial + + Paul England - for HA spec + + Brent Nelson - for many bug reports + + Jacques Mattheij - for Europe mirror. + + Patrick Negri - for TCP non-blocking connect. + http://gluster.org/core-team.php (<list-hacking@gluster.com>) + Gluster + + +File: user-guide.info, Node: Introduction, Next: Installation and Invocation, Prev: Acknowledgements, Up: Top + +1 Introduction +************** + +GlusterFS is a distributed filesystem. It works at the file level, not +block level. + + A network filesystem is one which allows us to access remote files. A +distributed filesystem is one that stores data on multiple machines and +makes them all appear to be a part of the same filesystem. + + Need for distributed filesystems + + * Scalability: A distributed filesystem allows us to store more data + than what can be stored on a single machine. + + * Redundancy: We might want to replicate crucial data on to several + machines. + + * Uniform access: One can mount a remote volume (for example your + home directory) from any machine and access the same data. + +1.1 Contacting us +================= + +You can reach us through the mailing list *gluster-devel* +(<gluster-devel@nongnu.org>). + + You can also find many of the developers on IRC, on the `#gluster' +channel on Freenode (<irc.freenode.net>). + + The GlusterFS documentation wiki is also useful: +<http://gluster.org/docs/index.php/GlusterFS> + + For commercial support, you can contact Gluster at: + + 3194 Winding Vista Common + Fremont, CA 94539 + USA. + + Phone: +1 (510) 354 6801 + Toll free: +1 (888) 813 6309 + Fax: +1 (510) 372 0604 + + You can also email us at <support@gluster.com>. + + +File: user-guide.info, Node: Installation and Invocation, Next: Concepts, Prev: Introduction, Up: Top + +2 Installation and Invocation +***************************** + +* Menu: + +* Pre requisites:: +* Getting GlusterFS:: +* Building:: +* Running GlusterFS:: +* A Tutorial Introduction:: + + +File: user-guide.info, Node: Pre requisites, Next: Getting GlusterFS, Up: Installation and Invocation + +2.1 Pre requisites +================== + +Before installing GlusterFS make sure you have the following components +installed. + +2.1.1 FUSE +---------- + +You'll need FUSE version 2.6.0 or higher to use GlusterFS. You can omit +installing FUSE if you want to build _only_ the server. Note that you +won't be able to mount a GlusterFS filesystem on a machine that does +not have FUSE installed. + + FUSE can be downloaded from: <http://fuse.sourceforge.net/> + + To get the best performance from GlusterFS, however, it is +recommended that you use our patched version of FUSE. See Patched FUSE +for details. + +2.1.2 Patched FUSE +------------------ + +The GlusterFS project maintains a patched version of FUSE meant to be +used with GlusterFS. The patches increase GlusterFS performance. It is +recommended that all users use the patched FUSE. + + The patched FUSE tarball can be downloaded from: + + <ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/> + + The specific changes made to FUSE are: + + * The communication channel size between FUSE kernel module and + GlusterFS has been increased to 1MB, permitting large reads and + writes to be sent in bigger chunks. + + * The kernel's read-ahead boundry has been extended upto 1MB. + + * Block size returned in the `stat()'/`fstat()' calls tuned to 1MB, + to make cp and similar commands perform I/O using that block size. + + * `flock()' locking support has been added (although some rework in + GlusterFS is needed for perfect compliance). + +2.1.3 libibverbs (optional) +--------------------------- + +This is only needed if you want GlusterFS to use InfiniBand as the +interconnect mechanism between server and client. You can get it from: + + <http://www.openfabrics.org/downloads.htm>. + +2.1.4 Bison and Flex +-------------------- + +These should be already installed on most Linux systems. If not, use +your distribution's normal software installation procedures to install +them. Make sure you install the relevant developer packages also. + + +File: user-guide.info, Node: Getting GlusterFS, Next: Building, Prev: Pre requisites, Up: Installation and Invocation + +2.2 Getting GlusterFS +===================== + +There are many ways to get hold of GlusterFS. For a production +deployment, the recommended method is to download the latest release +tarball. Release tarballs are available at: +<http://gluster.org/download.php>. + + If you want the bleeding edge development source, you can get them +from the GNU Arch(1) repository. First you must install GNU Arch +itself. Then register the GlusterFS archive by doing: + + $ tla register-archive http://arch.sv.gnu.org/archives/gluster + + Now you can check out the source itself: + + $ tla get -A gluster@sv.gnu.org glusterfs--mainline--3.0 + + ---------- Footnotes ---------- + + (1) <http://www.gnu.org/software/gnu-arch/> + + +File: user-guide.info, Node: Building, Next: Running GlusterFS, Prev: Getting GlusterFS, Up: Installation and Invocation + +2.3 Building +============ + +You can skip this section if you're installing from RPMs or DEBs. + + GlusterFS uses the Autotools mechanism to build. As such, the +procedure is straight-forward. First, change into the GlusterFS source +directory. + + $ cd glusterfs-<version> + + If you checked out the source from the Arch repository, you'll need +to run `./autogen.sh' first. Note that you'll need to have Autoconf and +Automake installed for this. + + Run `configure'. + + $ ./configure + + The configure script accepts the following options: + +`--disable-ibverbs' + Disable the InfiniBand transport mechanism. + +`--disable-fuse-client' + Disable the FUSE client. + +`--disable-server' + Disable building of the GlusterFS server. + +`--disable-bdb' + Disable building of Berkeley DB based storage translator. + +`--disable-mod_glusterfs' + Disable building of Apache/lighttpd glusterfs plugins. + +`--disable-epoll' + Use poll instead of epoll. + +`--disable-libglusterfsclient' + Disable building of libglusterfsclient + + + Build and install GlusterFS. + + # make install + + The binaries (`glusterfsd' and `glusterfs') will be by default +installed in `/usr/local/sbin/'. Translator, scheduler, and transport +shared libraries will be installed in +`/usr/local/lib/glusterfs/<version>/'. Sample volume specification +files will be in `/usr/local/etc/glusterfs/'. This document itself can +be found in `/usr/local/share/doc/glusterfs/'. If you passed the +`--prefix' argument to the configure script, then replace `/usr/local' +in the preceding paths with the prefix. + + +File: user-guide.info, Node: Running GlusterFS, Next: A Tutorial Introduction, Prev: Building, Up: Installation and Invocation + +2.4 Running GlusterFS +===================== + +* Menu: + +* Server:: +* Client:: + + +File: user-guide.info, Node: Server, Next: Client, Up: Running GlusterFS + +2.4.1 Server +------------ + +The GlusterFS server is necessary to export storage volumes to remote +clients (See *note Server protocol:: for more info). This section +documents the invocation of the GlusterFS server program and all the +command-line options accepted by it. + + Basic Options + +`-f, --volfile=<path>' + Use the volume file as the volume specification. + +`-s, --volfile-server=<hostname>' + Server to get volume file from. This option overrides -volfile + option. + +`-l, --log-file=<path>' + Specify the path for the log file. + +`-L, --log-level=<level>' + Set the log level for the server. Log level should be one of DEBUG, + WARNING, ERROR, CRITICAL, or NONE. + + Advanced Options + +`--debug' + Run in debug mode. This option sets -no-daemon, -log-level to + DEBUG and -log-file to console. + +`-N, --no-daemon' + Run glusterfsd as a foreground process. + +`-p, --pid-file=<path>' + Path for the PID file. + +`--volfile-id=<key>' + 'key' of the volfile to be fetched from server. + +`--volfile-server-port=<port-number>' + Listening port number of volfile server. + +`--volfile-server-transport=[tcp|ib-verbs]' + Transport type to get volfile from server. [default: `tcp'] + +`--xlator-options=<volume-name.option=value>' + Add/override a translator option for a volume with specified value. + + Miscellaneous Options + +`-?, --help' + Show this help text. + +`--usage' + Display a short usage message. + +`-V, --version' + Show version information. + + +File: user-guide.info, Node: Client, Prev: Server, Up: Running GlusterFS + +2.4.2 Client +------------ + +The GlusterFS client process is necessary to access remote storage +volumes and mount them locally using FUSE. This section documents the +invocation of the client process and all its command-line arguments. + + # glusterfs [options] <mountpoint> + + The `mountpoint' is the directory where you want the GlusterFS +filesystem to appear. Example: + + # glusterfs -f /usr/local/etc/glusterfs-client.vol /mnt + + The command-line options are detailed below. + + Basic Options + +`-f, --volfile=<path>' + Use the volume file as the volume specification. + +`-s, --volfile-server=<hostname>' + Server to get volume file from. This option overrides -volfile + option. + +`-l, --log-file=<path>' + Specify the path for the log file. + +`-L, --log-level=<level>' + Set the log level for the server. Log level should be one of DEBUG, + WARNING, ERROR, CRITICAL, or NONE. + + Advanced Options + +`--debug' + Run in debug mode. This option sets -no-daemon, -log-level to + DEBUG and -log-file to console. + +`-N, --no-daemon' + Run `glusterfs' as a foreground process. + +`-p, --pid-file=<path>' + Path for the PID file. + +`--volfile-id=<key>' + 'key' of the volfile to be fetched from server. + +`--volfile-server-port=<port-number>' + Listening port number of volfile server. + +`--volfile-server-transport=[tcp|ib-verbs]' + Transport type to get volfile from server. [default: `tcp'] + +`--xlator-options=<volume-name.option=value>' + Add/override a translator option for a volume with specified value. + +`--volume-name=<volume name>' + Volume name in client spec to use. Defaults to the root volume. + + FUSE Options + +`--attribute-timeout=<n>' + Attribute timeout for inodes in the kernel, in seconds. Defaults + to 1 second. + +`--disable-direct-io-mode' + Disable direct I/O mode in FUSE kernel module. + +`-e, --entry-timeout=<n>' + Entry timeout for directory entries in the kernel, in seconds. + Defaults to 1 second. + + Missellaneous Options + +`-?, --help' + Show this help information. + +`-V, --version' + Show version information. + + +File: user-guide.info, Node: A Tutorial Introduction, Prev: Running GlusterFS, Up: Installation and Invocation + +2.5 A Tutorial Introduction +=========================== + +This section will show you how to quickly get GlusterFS up and running. +We'll configure GlusterFS as a simple network filesystem, with one +server and one client. In this mode of usage, GlusterFS can serve as a +replacement for NFS. + + We'll make use of two machines; call them _server_ and _client_ (If +you don't want to setup two machines, just run everything that follows +on the same machine). In the examples that follow, the shell prompts +will use these names to clarify the machine on which the command is +being run. For example, a command that should be run on the server will +be shown with the prompt: + + [root@server]# + + Our goal is to make a directory on the _server_ (say, `/export') +accessible to the _client_. + + First of all, get GlusterFS installed on both the machines, as +described in the previous sections. Make sure you have the FUSE kernel +module loaded. You can ensure this by running: + + [root@server]# modprobe fuse + + Before we can run the GlusterFS client or server programs, we need +to write two files called _volume specifications_ (equivalently refered +to as _volfiles_). The volfile describes the _translator tree_ on a +node. The next chapter will explain the concepts of `translator' and +`volume specification' in detail. For now, just assume that the volfile +is like an NFS `/etc/export' file. + + On the server, create a text file somewhere (we'll assume the path +`/tmp/glusterfsd.vol') with the following contents. + + volume colon-o + type storage/posix + option directory /export + end-volume + + volume server + type protocol/server + subvolumes colon-o + option transport-type tcp + option auth.addr.colon-o.allow * + end-volume + + A brief explanation of the file's contents. The first section +defines a storage volume, named "colon-o" (the volume names are +arbitrary), which exports the `/export' directory. The second section +defines options for the translator which will make the storage volume +accessible remotely. It specifies `colon-o' as a subvolume. This +defines the _translator tree_, about which more will be said in the +next chapter. The two options specify that the TCP protocol is to be +used (as opposed to InfiniBand, for example), and that access to the +storage volume is to be provided to clients with any IP address at all. +If you wanted to restrict access to this server to only your subnet for +example, you'd specify something like `192.168.1.*' in the second +option line. + + On the client machine, create the following text file (again, we'll +assume the path to be `/tmp/glusterfs-client.vol'). Replace +_server-ip-address_ with the IP address of your server machine. If you +are doing all this on a single machine, use `127.0.0.1'. + + volume client + type protocol/client + option transport-type tcp + option remote-host _server-ip-address_ + option remote-subvolume colon-o + end-volume + + Now we need to start both the server and client programs. To start +the server: + + [root@server]# glusterfsd -f /tmp/glusterfs-server.vol + + To start the client: + + [root@client]# glusterfs -f /tmp/glusterfs-client.vol /mnt/glusterfs + + You should now be able to see the files under the server's `/export' +directory in the `/mnt/glusterfs' directory on the client. That's it; +GlusterFS is now working as a network file system. + + +File: user-guide.info, Node: Concepts, Next: Translators, Prev: Installation and Invocation, Up: Top + +3 Concepts +********** + +* Menu: + +* Filesystems in Userspace:: +* Translator:: +* Volume specification file:: + + +File: user-guide.info, Node: Filesystems in Userspace, Next: Translator, Up: Concepts + +3.1 Filesystems in Userspace +============================ + +A filesystem is usually implemented in kernel space. Kernel space +development is much harder than userspace development. FUSE is a kernel +module/library that allows us to write a filesystem completely in +userspace. + + FUSE consists of a kernel module which interacts with the userspace +implementation using a device file `/dev/fuse'. When a process makes a +syscall on a FUSE filesystem, VFS hands the request to the FUSE module, +which writes the request to `/dev/fuse'. The userspace implementation +polls `/dev/fuse', and when a request arrives, processes it and writes +the result back to `/dev/fuse'. The kernel then reads from the device +file and returns the result to the user process. + + In case of GlusterFS, the userspace program is the GlusterFS client. +The control flow is shown in the diagram below. The GlusterFS client +services the request by sending it to the server, which in turn hands +it to the local POSIX filesystem. + + + Fig 1. Control flow in GlusterFS + + +File: user-guide.info, Node: Translator, Next: Volume specification file, Prev: Filesystems in Userspace, Up: Concepts + +3.2 Translator +============== + +The _translator_ is the most important concept in GlusterFS. In fact, +GlusterFS is nothing but a collection of translators working together, +forming a translator _tree_. + + The idea of a translator is perhaps best understood using an +analogy. Consider the VFS in the Linux kernel. The VFS abstracts the +various filesystem implementations (such as EXT3, ReiserFS, XFS, etc.) +supported by the kernel. When an application calls the kernel to +perform an operation on a file, the kernel passes the request on to the +appropriate filesystem implementation. + + For example, let's say there are two partitions on a Linux machine: +`/', which is an EXT3 partition, and `/usr', which is a ReiserFS +partition. Now if an application wants to open a file called, say, +`/etc/fstab', then the kernel will internally pass the request to the +EXT3 implementation. If on the other hand, an application wants to +read a file called `/usr/src/linux/CREDITS', then the kernel will call +upon the ReiserFS implementation to do the job. + + The "filesystem implementation" objects are analogous to GlusterFS +translators. A GlusterFS translator implements all the filesystem +operations. Whereas in VFS there is a two-level tree (with the kernel +at the root and all the filesystem implementation as its children), in +GlusterFS there exists a more elaborate tree structure. + + We can now define translators more precisely. A GlusterFS translator +is a shared object (`.so') that implements every filesystem call. +GlusterFS translators can be arranged in an arbitrary tree structure +(subject to constraints imposed by the translators). When GlusterFS +receives a filesystem call, it passes it on to the translator at the +root of the translator tree. The root translator may in turn pass it on +to any or all of its children, and so on, until the leaf nodes are +reached. The result of a filesystem call is communicated in the reverse +fashion, from the leaf nodes up to the root node, and then on to the +application. + + So what might a translator tree look like? + + + Fig 2. A sample translator tree + + The diagram depicts three servers and one GlusterFS client. It is +important to note that conceptually, the translator tree spans machine +boundaries. Thus, the client machine in the diagram, `10.0.0.1', can +access the aggregated storage of the filesystems on the server machines +`10.0.0.2', `10.0.0.3', and `10.0.0.4'. The translator diagram will +make more sense once you've read the next chapter and understood the +functions of the various translators. + + +File: user-guide.info, Node: Volume specification file, Prev: Translator, Up: Concepts + +3.3 Volume specification file +============================= + +The volume specification file describes the translator tree for both the +server and client programs. + + A volume specification file is a sequence of volume definitions. +The syntax of a volume definition is explained below: + + *volume* _volume-name_ + *type* _translator-name_ + *option* _option-name_ _option-value_ + ... + *subvolumes* _subvolume1_ _subvolume2_ ... + *end-volume* + + ... + +_volume-name_ + An identifier for the volume. This is just a human-readable name, + and can contain any alphanumeric character. For instance, + "storage-1", "colon-o", or "forty-two". + +_translator-name_ + Name of one of the available translators. Example: + `protocol/client', `cluster/unify'. + +_option-name_ + Name of a valid option for the translator. + +_option-value_ + Value for the option. Everything following the "option" keyword to + the end of the line is considered the value; it is up to the + translator to parse it. + +_subvolume1_, _subvolume2_, ... + Volume names of sub-volumes. The sub-volumes must already have + been defined earlier in the file. + + There are a few rules you must follow when writing a volume +specification file: + + * Everything following a ``#'' is considered a comment and is + ignored. Blank lines are also ignored. + + * All names and keywords are case-sensitive. + + * The order of options inside a volume definition does not matter. + + * An option value may not span multiple lines. + + * If an option is not specified, it will assume its default value. + + * A sub-volume must have already been defined before it can be + referenced. This means you have to write the specification file + "bottom-up", starting from the leaf nodes of the translator tree + and moving up to the root. + + A simple example volume specification file is shown below: + + # This is a comment line + volume client + type protocol/client + option transport-type tcp + option remote-host localhost # Also a comment + option remote-subvolume brick + # The subvolumes line may be absent + end-volume + + volume iot + type performance/io-threads + option thread-count 4 + subvolumes client + end-volume + + volume wb + type performance/write-behind + subvolumes iot + end-volume + + +File: user-guide.info, Node: Translators, Next: Usage Scenarios, Prev: Concepts, Up: Top + +4 Translators +************* + +* Menu: + +* Storage Translators:: +* Client and Server Translators:: +* Clustering Translators:: +* Performance Translators:: +* Features Translators:: +* Miscellaneous Translators:: + + This chapter documents all the available GlusterFS translators in +detail. Each translator section will show its name (for example, +`cluster/unify'), briefly describe its purpose and workings, and list +every option accepted by that translator and their meaning. + + +File: user-guide.info, Node: Storage Translators, Next: Client and Server Translators, Up: Translators + +4.1 Storage Translators +======================= + +The storage translators form the "backend" for GlusterFS. Currently, +the only available storage translator is the POSIX translator, which +stores files on a normal POSIX filesystem. A pleasant consequence of +this is that your data will still be accessible if GlusterFS crashes or +cannot be started. + + Other storage backends are planned for the future. One of the +possibilities is an Amazon S3 translator. Amazon S3 is an unlimited +online storage service accessible through a web services API. The S3 +translator will allow you to access the storage as a normal POSIX +filesystem. (1) + +* Menu: + +* POSIX:: +* BDB:: + + ---------- Footnotes ---------- + + (1) Some more discussion about this can be found at: + +http://developer.amazonwebservices.com/connect/message.jspa?messageID=52873 + + +File: user-guide.info, Node: POSIX, Next: BDB, Up: Storage Translators + +4.1.1 POSIX +----------- + + type storage/posix + + The `posix' translator uses a normal POSIX filesystem as its +"backend" to actually store files and directories. This can be any +filesystem that supports extended attributes (EXT3, ReiserFS, XFS, +...). Extended attributes are used by some translators to store +metadata, for example, by the replicate and stripe translators. See +*note Replicate:: and *note Stripe::, respectively for details. + +`directory <path>' + The directory on the local filesystem which is to be used for + storage. + + +File: user-guide.info, Node: BDB, Prev: POSIX, Up: Storage Translators + +4.1.2 BDB +--------- + + type storage/bdb + + The `BDB' translator uses a Berkeley DB database as its "backend" to +actually store files as key-value pair in the database and directories +as regular POSIX directories. Note that BDB does not provide extended +attribute support for regular files. Do not use BDB as storage +translator while using any translator that demands extended attributes +on "backend". + +`directory <path>' + The directory on the local filesystem which is to be used for + storage. + +`mode [cache|persistent] (cache)' + When BDB is run in `cache' mode, recovery of back-end is not + completely guaranteed. `persistent' guarantees that BDB can + recover back-end from Berkeley DB even if GlusterFS crashes. + +`errfile <path>' + The path of the file to be used as `errfile' for Berkeley DB to + report detailed error messages, if any. Note that all the contents + of this file will be written by Berkeley DB, not GlusterFS. + +`logdir <path>' + + +File: user-guide.info, Node: Client and Server Translators, Next: Clustering Translators, Prev: Storage Translators, Up: Translators + +4.2 Client and Server Translators +================================= + +The client and server translator enable GlusterFS to export a +translator tree over the network or access a remote GlusterFS server. +These two translators implement GlusterFS's network protocol. + +* Menu: + +* Transport modules:: +* Client protocol:: +* Server protocol:: + + +File: user-guide.info, Node: Transport modules, Next: Client protocol, Up: Client and Server Translators + +4.2.1 Transport modules +----------------------- + +The client and server translators are capable of using any of the +pluggable transport modules. Currently available transport modules are +`tcp', which uses a TCP connection between client and server to +communicate; `ib-sdp', which uses a TCP connection over InfiniBand, and +`ibverbs', which uses high-speed InfiniBand connections. + + Each transport module comes in two different versions, one to be +used on the server side and the other on the client side. + +4.2.1.1 TCP +........... + +The TCP transport module uses a TCP/IP connection between the server +and the client. + + option transport-type tcp + + The TCP client module accepts the following options: + +`non-blocking-connect [no|off|on|yes] (on)' + Whether to make the connection attempt asynchronous. + +`remote-port <n> (24007)' + Server port to connect to. + +`remote-host <hostname> *' + Hostname or IP address of the server. If the host name resolves to + multiple IP addresses, all of them will be tried in a round-robin + fashion. This feature can be used to implement fail-over. + + The TCP server module accepts the following options: + +`bind-address <address> (0.0.0.0)' + The local interface on which the server should listen to requests. + Default is to listen on all interfaces. + +`listen-port <n> (24007)' + The local port to listen on. + +4.2.1.2 IB-SDP +.............. + + option transport-type ib-sdp + + kernel implements socket interface for ib hardware. SDP is over +ib-verbs. This module accepts the same options as `tcp' + +4.2.1.3 ibverbs +............... + + option transport-type tcp + + InfiniBand is a scalable switched fabric interconnect mechanism +primarily used in high-performance computing. InfiniBand can deliver +data throughput of the order of 10 Gbit/s, with latencies of 4-5 ms. + + The `ib-verbs' transport accesses the InfiniBand hardware through +the "verbs" API, which is the lowest level of software access possible +and which gives the highest performance. On InfiniBand hardware, it is +always best to use `ib-verbs'. Use `ib-sdp' only if you cannot get +`ib-verbs' working for some reason. + + The `ib-verbs' client module accepts the following options: + +`non-blocking-connect [no|off|on|yes] (on)' + Whether to make the connection attempt asynchronous. + +`remote-port <n> (24007)' + Server port to connect to. + +`remote-host <hostname> *' + Hostname or IP address of the server. If the host name resolves to + multiple IP addresses, all of them will be tried in a round-robin + fashion. This feature can be used to implement fail-over. + + The `ib-verbs' server module accepts the following options: + +`bind-address <address> (0.0.0.0)' + The local interface on which the server should listen to requests. + Default is to listen on all interfaces. + +`listen-port <n> (24007)' + The local port to listen on. + + The following options are common to both the client and server +modules: + + If you are familiar with InfiniBand jargon, the mode is used by +GlusterFS is "reliable connection-oriented channel transfer". + +`ib-verbs-work-request-send-count <n> (64)' + Length of the send queue in datagrams. [Reason to + increase/decrease?] + +`ib-verbs-work-request-recv-count <n> (64)' + Length of the receive queue in datagrams. [Reason to + increase/decrease?] + +`ib-verbs-work-request-send-size <size> (128KB)' + Size of each datagram that is sent. [Reason to increase/decrease?] + +`ib-verbs-work-request-recv-size <size> (128KB)' + Size of each datagram that is received. [Reason to + increase/decrease?] + +`ib-verbs-port <n> (1)' + Port number for ib-verbs. + +`ib-verbs-mtu [256|512|1024|2048|4096] (2048)' + The Maximum Transmission Unit [Reason to increase/decrease?] + +`ib-verbs-device-name <device-name> (first device in the list)' + InfiniBand device to be used. + + For maximum performance, you should ensure that the send/receive +counts on both the client and server are the same. + + ib-verbs is preferred over ib-sdp. + + +File: user-guide.info, Node: Client protocol, Next: Server protocol, Prev: Transport modules, Up: Client and Server Translators + +4.2.2 Client +------------ + + type procotol/client + + The client translator enables the GlusterFS client to access a +remote server's translator tree. + +`transport-type [tcp,ib-sdp,ib-verbs] (tcp)' + The transport type to use. You should use the client versions of + all the transport modules (`tcp', `ib-sdp', `ib-verbs'). + +`remote-subvolume <volume_name> *' + The name of the volume on the remote host to attach to. Note that + this is _not_ the name of the `protocol/server' volume on the + server. It should be any volume under the server. + +`transport-timeout <n> (120- seconds)' + Inactivity timeout. If a reply is expected and no activity takes + place on the connection within this time, the transport connection + will be broken, and a new connection will be attempted. + + +File: user-guide.info, Node: Server protocol, Prev: Client protocol, Up: Client and Server Translators + +4.2.3 Server +------------ + + type protocol/server + + The server translator exports a translator tree and makes it +accessible to remote GlusterFS clients. + +`client-volume-filename <path> (<CONFDIR>/glusterfs-client.vol)' + The volume specification file to use for the client. This is the + file the client will receive when it is invoked with the + `--server' option (*note Client::). + +`transport-type [tcp,ib-verbs,ib-sdp] (tcp)' + The transport to use. You should use the server versions of all + the transport modules (`tcp', `ib-sdp', `ib-verbs'). + +`auth.addr.<volume name>.allow <IP address wildcard pattern>' + IP addresses of the clients that are allowed to attach to the + specified volume. This can be a wildcard. For example, a wildcard + of the form `192.168.*.*' allows any host in the `192.168.x.x' + subnet to connect to the server. + + + +File: user-guide.info, Node: Clustering Translators, Next: Performance Translators, Prev: Client and Server Translators, Up: Translators + +4.3 Clustering Translators +========================== + +The clustering translators are the most important GlusterFS +translators, since it is these that make GlusterFS a cluster +filesystem. These translators together enable GlusterFS to access an +arbitrarily large amount of storage, and provide RAID-like redundancy +and distribution over the entire cluster. + + There are three clustering translators: *unify*, *replicate*, and +*stripe*. The unify translator aggregates storage from many server +nodes. The replicate translator provides file replication. The stripe +translator allows a file to be spread across many server nodes. The +following sections look at each of these translators in detail. + +* Menu: + +* Unify:: +* Replicate:: +* Stripe:: + + +File: user-guide.info, Node: Unify, Next: Replicate, Up: Clustering Translators + +4.3.1 Unify +----------- + + type cluster/unify + + The unify translator presents a `unified' view of all its +sub-volumes. That is, it makes the union of all its sub-volumes appear +as a single volume. It is the unify translator that gives GlusterFS the +ability to access an arbitrarily large amount of storage. + + For unify to work correctly, certain invariants need to be +maintained across the entire network. These are: + + * The directory structure of all the sub-volumes must be identical. + + * A particular file can exist on only one of the sub-volumes. + Phrasing it in another way, a pathname such as + `/home/calvin/homework.txt') is unique across the entire cluster. + + + +Looking at the second requirement, you might wonder how one can +accomplish storing redundant copies of a file, if no file can exist +multiple times. To answer, we must remember that these invariants are +from _unify's perspective_. A translator such as replicate at a lower +level in the translator tree than unify may subvert this picture. + + The first invariant might seem quite tedious to ensure. We shall see +later that this is not so, since unify's _self-heal_ mechanism takes +care of maintaining it. + + The second invariant implies that unify needs some way to decide +which file goes where. Unify makes use of _scheduler_ modules for this +purpose. + + When a file needs to be created, unify's scheduler decides upon the +sub-volume to be used to store the file. There are many schedulers +available, each using a different algorithm and suitable for different +purposes. + + The various schedulers are described in detail in the sections that +follow. + +4.3.1.1 ALU +........... + + option scheduler alu + + ALU stands for "Adaptive Least Usage". It is the most advanced +scheduler available in GlusterFS. It balances the load across volumes +taking several factors in account. It adapts itself to changing I/O +patterns according to its configuration. When properly configured, it +can eliminate the need for regular tuning of the filesystem to keep +volume load nicely balanced. + + The ALU scheduler is composed of multiple least-usage +sub-schedulers. Each sub-scheduler keeps track of a certain type of +load, for each of the sub-volumes, getting statistics from the +sub-volumes themselves. The sub-schedulers are these: + + * disk-usage: The used and free disk space on the volume. + + * read-usage: The amount of reading done from this volume. + + * write-usage: The amount of writing done to this volume. + + * open-files-usage: The number of files currently open from this + volume. + + * disk-speed-usage: The speed at which the disks are spinning. This + is a constant value and therefore not very useful. + + The ALU scheduler needs to know which of these sub-schedulers to use, +and in which order to evaluate them. This is done through the `option +alu.order' configuration directive. + + Each sub-scheduler needs to know two things: when to kick in (the +entry-threshold), and how long to stay in control (the exit-threshold). +For example: when unifying three disks of 100GB, keeping an exact +balance of disk-usage is not necesary. Instead, there could be a 1GB +margin, which can be used to nicely balance other factors, such as +read-usage. The disk-usage scheduler can be told to kick in only when a +certain threshold of discrepancy is passed, such as 1GB. When it +assumes control under this condition, it will write all subsequent data +to the least-used volume. If it is doing so, it is unwise to stop right +after the values are below the entry-threshold again, since that would +make it very likely that the situation will occur again very soon. Such +a situation would cause the ALU to spend most of its time disk-usage +scheduling, which is unfair to the other sub-schedulers. The +exit-threshold therefore defines the amount of data that needs to be +written to the least-used disk, before control is relinquished again. + + In addition to the sub-schedulers, the ALU scheduler also has +"limits" options. These can stop the creation of new files on a volume +once values drop below a certain threshold. For example, setting +`option alu.limits.min-free-disk 5GB' will stop the scheduling of files +to volumes that have less than 5GB of free disk space, leaving the +files on that disk some room to grow. + + The actual values you assign to the thresholds for sub-schedulers and +limits depend on your situation. If you have fast-growing files, you'll +want to stop file-creation on a disk much earlier than when hardly any +of your files are growing. If you care less about disk-usage balance +than about read-usage balance, you'll want a bigger disk-usage +scheduler entry-threshold and a smaller read-usage scheduler +entry-threshold. + + For thresholds defining a size, values specifying "KB", "MB" and "GB" +are allowed. For example: `option alu.limits.min-free-disk 5GB'. + +`alu.order <order> * ("disk-usage:write-usage:read-usage:open-files-usage:disk-speed")' + +`alu.disk-usage.entry-threshold <size> (1GB)' + +`alu.disk-usage.exit-threshold <size> (512MB)' + +`alu.write-usage.entry-threshold <%> (25)' + +`alu.write-usage.exit-threshold <%> (5)' + +`alu.read-usage.entry-threshold <%> (25)' + +`alu.read-usage.exit-threshold <%> (5)' + +`alu.open-files-usage.entry-threshold <n> (1000)' + +`alu.open-files-usage.exit-threshold <n> (100)' + +`alu.limits.min-free-disk <%>' + +`alu.limits.max-open-files <n>' + +4.3.1.2 Round Robin (RR) +........................ + + option scheduler rr + + Round-Robin (RR) scheduler creates files in a round-robin fashion. +Each client will have its own round-robin loop. When your files are +mostly similar in size and I/O access pattern, this scheduler is a good +choice. RR scheduler checks for free disk space on the server before +scheduling, so you can know when to add another server node. The +default value of min-free-disk is 5% and is checked on file creation +calls, with atleast 10 seconds (by default) elapsing between two checks. + + Options: +`rr.limits.min-free-disk <%> (5)' + Minimum free disk space a node must have for RR to schedule a file + to it. + +`rr.refresh-interval <t> (10 seconds)' + Time between two successive free disk space checks. + +4.3.1.3 Random +.............. + + option scheduler random + + The random scheduler schedules file creation randomly among its +child nodes. Like the round-robin scheduler, it also checks for a +minimum amount of free disk space before scheduling a file to a node. + +`random.limits.min-free-disk <%> (5)' + Minimum free disk space a node must have for random to schedule a + file to it. + +`random.refresh-interval <t> (10 seconds)' + Time between two successive free disk space checks. + +4.3.1.4 NUFA +............ + + option scheduler nufa + + It is common in many GlusterFS computing environments for all +deployed machines to act as both servers and clients. For example, a +research lab may have 40 workstations each with its own storage. All of +these workstations might act as servers exporting a volume as well as +clients accessing the entire cluster's storage. In such a situation, +it makes sense to store locally created files on the local workstation +itself (assuming files are accessed most by the workstation that +created them). The Non-Uniform File Allocation (NUFA) scheduler +accomplishes that. + + NUFA gives the local system first priority for file creation over +other nodes. If the local volume does not have more free disk space +than a specified amount (5% by default) then NUFA schedules files among +the other child volumes in a round-robin fashion. + + NUFA is named after the similar strategy used for memory access, +NUMA(1). + +`nufa.limits.min-free-disk <%> (5)' + Minimum disk space that must be free (local or remote) for NUFA to + schedule a file to it. + +`nufa.refresh-interval <t> (10 seconds)' + Time between two successive free disk space checks. + +`nufa.local-volume-name <volume>' + The name of the volume corresponding to the local system. This + volume must be one of the children of the unify volume. This + option is mandatory. + +4.3.1.5 Namespace +................. + +Namespace volume needed because: - persistent inode numbers. - file +exists even when node is down. + + namespace files are simply touched. on every lookup it is checked. + +`namespace <volume> *' + Name of the namespace volume (which should be one of the unify + volume's children). + +`self-heal [on|off] (on)' + Enable/disable self-heal. Unless you know what you are doing, do + not disable self-heal. + +4.3.1.6 Self Heal +................. + +* When a 'lookup()/stat()' call is made on directory for the first +time, a self-heal call is made, which checks for the consistancy of its +child nodes. If an entry is present in storage node, but not in +namespace, that entry is created in namespace, and vica-versa. There is +an writedir() API introduced which is used for the same. It also checks +for permissions, and uid/gid consistencies. + + * This check is also done when an server goes down and comes up. + + * If one starts with an empty namespace export, but has data in +storage nodes, a 'find .>/dev/null' or 'ls -lR >/dev/null' should help +to build namespace in one shot. Even otherwise, namespace is built on +demand when a file is looked up for the first time. + + NOTE: There are some issues (Kernel 'Oops' msgs) seen with +fuse-2.6.3, when someone deletes namespace in backend, when glusterfs is +running. But with fuse-2.6.5, this issue is not there. + + ---------- Footnotes ---------- + + (1) Non-Uniform Memory Access: +<http://en.wikipedia.org/wiki/Non-Uniform_Memory_Access> + + +File: user-guide.info, Node: Replicate, Next: Stripe, Prev: Unify, Up: Clustering Translators + +4.3.2 Replicate (formerly AFR) +------------------------------ + + type cluster/replicate + + Replicate provides RAID-1 like functionality for GlusterFS. +Replicate replicates files and directories across the subvolumes. Hence +if Replicate has four subvolumes, there will be four copies of all +files and directories. Replicate provides high-availability, i.e., in +case one of the subvolumes go down (e. g. server crash, network +disconnection) Replicate will still service the requests using the +redundant copies. + + Replicate also provides self-heal functionality, i.e., in case the +crashed servers come up, the outdated files and directories will be +updated with the latest versions. Replicate uses extended attributes of +the backend file system to track the versioning of files and +directories and provide the self-heal feature. + + volume replicate-example + type cluster/replicate + subvolumes brick1 brick2 brick3 + end-volume + + This sample configuration will replicate all directories and files on +brick1, brick2 and brick3. + + All the read operations happen from the first alive child. If all the +three sub-volumes are up, reads will be done from brick1; if brick1 is +down read will be done from brick2. In case read() was being done on +brick1 and it goes down, replicate transparently falls back to brick2. + + The next release of GlusterFS will add the following features: + * Ability to specify the sub-volume from which read operations are + to be done (this will help users who have one of the sub-volumes + as a local storage volume). + + * Allow scheduling of read operations amongst the sub-volumes in a + round-robin fashion. + + The order of the subvolumes list should be same across all the +'replicate's as they will be used for locking purposes. + +4.3.2.1 Self Heal +................. + +Replicate has self-heal feature, which updates the outdated file and +directory copies by the most recent versions. For example consider the +following config: + + volume replicate-example + type cluster/replicate + subvolumes brick1 brick2 + end-volume + +4.3.2.2 File self-heal +...................... + +Now if we create a file foo.txt on replicate-example, the file will be +created on brick1 and brick2. The file will have two extended +attributes associated with it in the backend filesystem. One is +trusted.afr.createtime and the other is trusted.afr.version. The +trusted.afr.createtime xattr has the create time (in terms of seconds +since epoch) and trusted.afr.version is a number that is incremented +each time a file is modified. This increment happens during close +(incase any write was done before close). + + If brick1 goes down, we edit foo.txt the version gets incremented. +Now the brick1 comes back up, when we open() on foo.txt replicate will +check if their versions are same. If they are not same, the outdated +copy is replaced by the latest copy and its version is updated. After +the sync the open() proceeds in the usual manner and the application +calling open() can continue on its access to the file. + + If brick1 goes down, we delete foo.txt and create a file with the +same name again i.e foo.txt. Now brick1 comes back up, clearly there is +a chance that the version on brick1 being more than the version on +brick2, this is where createtime extended attribute helps in deciding +which the outdated copy is. Hence we need to consider both createtime +and version to decide on the latest copy. + + The version attribute is incremented during the close() call. Version +will not be incremented in case there was no write() done. In case the +fd that the close() gets was got by create() call, we also create the +createtime extended attribute. + +4.3.2.3 Directory self-heal +........................... + +Suppose brick1 goes down, we delete foo.txt, brick1 comes back up, now +we should not create foo.txt on brick2 but we should delete foo.txt on +brick1. We handle this situation by having the createtime and version +attribute on the directory similar to the file. when lookup() is done +on the directory, we compare the createtime/version attributes of the +copies and see which files needs to be deleted and delete those files +and update the extended attributes of the outdated directory copy. +Each time a directory is modified (a file or a subdirectory is created +or deleted inside the directory) and one of the subvols is down, we +increment the directory's version. + + lookup() is a call initiated by the kernel on a file or directory +just before any access to that file or directory. In glusterfs, by +default, lookup() will not be called in case it was called in the past +one second on that particular file or directory. + + The extended attributes can be seen in the backend filesystem using +the `getfattr' command. (`getfattr -n trusted.afr.version <file>') + +`debug [on|off] (off)' + +`self-heal [on|off] (on)' + +`replicate <pattern> (*:1)' + +`lock-node <child_volume> (first child is used by default)' + + +File: user-guide.info, Node: Stripe, Prev: Replicate, Up: Clustering Translators + +4.3.3 Stripe +------------ + + type cluster/stripe + + The stripe translator distributes the contents of a file over its +sub-volumes. It does this by creating a file equal in size to the +total size of the file on each of its sub-volumes. It then writes only +a part of the file to each sub-volume, leaving the rest of it empty. +These empty regions are called `holes' in Unix terminology. The holes +do not consume any disk space. + + The diagram below makes this clear. + + + +You can configure stripe so that only filenames matching a pattern are +striped. You can also configure the size of the data to be stored on +each sub-volume. + +`block-size <pattern>:<size> (*:0 no striping)' + Distribute files matching `<pattern>' over the sub-volumes, + storing at least `<size>' on each sub-volume. For example, + + option block-size *.mpg:1M + + distributes all files ending in `.mpg', storing at least 1 MB on + each sub-volume. + + Any number of `block-size' option lines may be present, specifying + different sizes for different file name patterns. + + +File: user-guide.info, Node: Performance Translators, Next: Features Translators, Prev: Clustering Translators, Up: Translators + +4.4 Performance Translators +=========================== + +* Menu: + +* Read Ahead:: +* Write Behind:: +* IO Threads:: +* IO Cache:: +* Booster:: + + +File: user-guide.info, Node: Read Ahead, Next: Write Behind, Up: Performance Translators + +4.4.1 Read Ahead +---------------- + + type performance/read-ahead + + The read-ahead translator pre-fetches data in advance on every read. +This benefits applications that mostly process files in sequential +order, since the next block of data will already be available by the +time the application is done with the current one. + + Additionally, the read-ahead translator also behaves as a +read-aggregator. Many small read operations are combined and issued as +fewer, larger read requests to the server. + + Read-ahead deals in "pages" as the unit of data fetched. The page +size is configurable, as is the "page count", which is the number of +pages that are pre-fetched. + + Read-ahead is best used with InfiniBand (using the ib-verbs +transport). On FastEthernet and Gigabit Ethernet networks, GlusterFS +can achieve the link-maximum throughput even without read-ahead, making +it quite superflous. + + Note that read-ahead only happens if the reads are perfectly +sequential. If your application accesses data in a random fashion, +using read-ahead might actually lead to a performance loss, since +read-ahead will pointlessly fetch pages which won't be used by the +application. + + Options: +`page-size <n> (256KB)' + The unit of data that is pre-fetched. + +`page-count <n> (2)' + The number of pages that are pre-fetched. + +`force-atime-update [on|off|yes|no] (off|no)' + Whether to force an access time (atime) update on the file on + every read. Without this, the atime will be slightly imprecise, as + it will reflect the time when the read-ahead translator read the + data, not when the application actually read it. + + +File: user-guide.info, Node: Write Behind, Next: IO Threads, Prev: Read Ahead, Up: Performance Translators + +4.4.2 Write Behind +------------------ + + type performance/write-behind + + The write-behind translator improves the latency of a write +operation. It does this by relegating the write operation to the +background and returning to the application even as the write is in +progress. Using the write-behind translator, successive write requests +can be pipelined. This mode of write-behind operation is best used on +the client side, to enable decreased write latency for the application. + + The write-behind translator can also aggregate write requests. If the +`aggregate-size' option is specified, then successive writes upto that +size are accumulated and written in a single operation. This mode of +operation is best used on the server side, as this will decrease the +disk's head movement when multiple files are being written to in +parallel. + + The `aggregate-size' option has a default value of 128KB. Although +this works well for most users, you should always experiment with +different values to determine the one that will deliver maximum +performance. This is because the performance of write-behind depends on +your interconnect, size of RAM, and the work load. + +`aggregate-size <n> (128KB)' + Amount of data to accumulate before doing a write + +`flush-behind [on|yes|off|no] (off|no)' + + +File: user-guide.info, Node: IO Threads, Next: IO Cache, Prev: Write Behind, Up: Performance Translators + +4.4.3 IO Threads +---------------- + + type performance/io-threads + + The IO threads translator is intended to increase the responsiveness +of the server to metadata operations by doing file I/O (read, write) in +a background thread. Since the GlusterFS server is single-threaded, +using the IO threads translator can significantly improve performance. +This translator is best used on the server side, loaded just below the +server protocol translator. + + IO threads operates by handing out read and write requests to a +separate thread. The total number of threads in existence at a time is +constant, and configurable. + +`thread-count <n> (1)' + Number of threads to use. + + +File: user-guide.info, Node: IO Cache, Next: Booster, Prev: IO Threads, Up: Performance Translators + +4.4.4 IO Cache +-------------- + + type performance/io-cache + + The IO cache translator caches data that has been read. This is +useful if many applications read the same data multiple times, and if +reads are much more frequent than writes (for example, IO caching may be +useful in a web hosting environment, where most clients will simply +read some files and only a few will write to them). + + The IO cache translator reads data from its child in `page-size' +chunks. It caches data upto `cache-size' bytes. The cache is +maintained as a prioritized least-recently-used (LRU) list, with +priorities determined by user-specified patterns to match filenames. + + When the IO cache translator detects a write operation, the cache +for that file is flushed. + + The IO cache translator periodically verifies the consistency of +cached data, using the modification times on the files. The +verification timeout is configurable. + +`page-size <n> (128KB)' + Size of a page. + +`cache-size (n) (32MB)' + Total amount of data to be cached. + +`force-revalidate-timeout <n> (1)' + Timeout to force a cache consistency verification, in seconds. + +`priority <pattern> (*:0)' + Filename patterns listed in order of priority. + + +File: user-guide.info, Node: Booster, Prev: IO Cache, Up: Performance Translators + +4.4.5 Booster +------------- + + type performance/booster + + The booster translator gives applications a faster path to +communicate read and write requests to GlusterFS. Normally, all +requests to GlusterFS from applications go through FUSE, as indicated +in *note Filesystems in Userspace::. Using the booster translator in +conjunction with the GlusterFS booster shared library, an application +can bypass the FUSE path and send read/write requests directly to the +GlusterFS client process. + + The booster mechanism consists of two parts: the booster translator, +and the booster shared library. The booster translator is meant to be +loaded on the client side, usually at the root of the translator tree. +The booster shared library should be `LD_PRELOAD'ed with the +application. + + The booster translator when loaded opens a Unix domain socket and +listens for read/write requests on it. The booster shared library +intercepts read and write system calls and sends the requests to the +GlusterFS process directly using the Unix domain socket, bypassing FUSE. +This leads to superior performance. + + Once you've loaded the booster translator in your volume +specification file, you can start your application as: + + $ LD_PRELOAD=/usr/local/bin/glusterfs-booster.so your_app + + The booster translator accepts no options. + + +File: user-guide.info, Node: Features Translators, Next: Miscellaneous Translators, Prev: Performance Translators, Up: Translators + +4.5 Features Translators +======================== + +* Menu: + +* POSIX Locks:: +* Fixed ID:: + + +File: user-guide.info, Node: POSIX Locks, Next: Fixed ID, Up: Features Translators + +4.5.1 POSIX Locks +----------------- + + type features/posix-locks + + This translator provides storage independent POSIX record locking +support (`fcntl' locking). Typically you'll want to load this on the +server side, just above the POSIX storage translator. Using this +translator you can get both advisory locking and mandatory locking +support. It also handles `flock()' locks properly. + + Caveat: Consider a file that does not have its mandatory locking bits +(+setgid, -group execution) turned on. Assume that this file is now +opened by a process on a client that has the write-behind xlator +loaded. The write-behind xlator does not cache anything for files which +have mandatory locking enabled, to avoid incoherence. Let's say that +mandatory locking is now enabled on this file through another client. +The former client will not know about this change, and write-behind may +erroneously report a write as being successful when in fact it would +fail due to the region it is writing to being locked. + + There seems to be no easy way to fix this. To work around this +problem, it is recommended that you never enable the mandatory bits on +a file while it is open. + +`mandatory [on|off] (on)' + Turns mandatory locking on. + + +File: user-guide.info, Node: Fixed ID, Prev: POSIX Locks, Up: Features Translators + +4.5.2 Fixed ID +-------------- + + type features/fixed-id + + The fixed ID translator makes all filesystem requests from the client +to appear to be coming from a fixed, specified UID/GID, regardless of +which user actually initiated the request. + +`fixed-uid <n> [if not set, not used]' + The UID to send to the server + +`fixed-gid <n> [if not set, not used]' + The GID to send to the server + + +File: user-guide.info, Node: Miscellaneous Translators, Prev: Features Translators, Up: Translators + +4.6 Miscellaneous Translators +============================= + +* Menu: + +* ROT-13:: +* Trace:: + + +File: user-guide.info, Node: ROT-13, Next: Trace, Up: Miscellaneous Translators + +4.6.1 ROT-13 +------------ + + type encryption/rot-13 + + ROT-13 is a toy translator that can "encrypt" and "decrypt" file +contents using the ROT-13 algorithm. ROT-13 is a trivial algorithm that +rotates each alphabet by thirteen places. Thus, 'A' becomes 'N', 'B' +becomes 'O', and 'Z' becomes 'M'. + + It goes without saying that you shouldn't use this translator if you +need _real_ encryption (a future release of GlusterFS will have real +encryption translators). + +`encrypt-write [on|off] (on)' + Whether to encrypt on write + +`decrypt-read [on|off] (on)' + Whether to decrypt on read + + +File: user-guide.info, Node: Trace, Prev: ROT-13, Up: Miscellaneous Translators + +4.6.2 Trace +----------- + + type debug/trace + + The trace translator is intended for debugging purposes. When +loaded, it logs all the system calls received by the server or client +(wherever trace is loaded), their arguments, and the results. You must +use a GlusterFS log level of DEBUG (See *note Running GlusterFS::) for +trace to work. + + Sample trace output (lines have been wrapped for readability): + 2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68 + (*this=0x8059e40, loc=0x8091984 {path=/iozone3_283, inode=0x8091f00}, + fd=0x8091d50) + + 2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace: + (*this=0x8059e40, op_ret=4, op_errno=1, fd=0x8091d50) + + 2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69 + (*this=0x8059e40, size=4096, offset=0 fd=0x8091d50) + + 2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace: + (*this=0x8059e40, op_ret=0, op_errno=0, count=4) + + 2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71 + (*this=0x8059e40, *fd=0x8091d50) + + 2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace: + (*this=0x8059e40, op_ret=0, op_errno=1) + + +File: user-guide.info, Node: Usage Scenarios, Next: Troubleshooting, Prev: Translators, Up: Top + +5 Usage Scenarios +***************** + +5.1 Advanced Striping +===================== + +This section is based on the Advanced Striping tutorial written by +Anand Avati on the GlusterFS wiki (1). + +5.1.1 Mixed Storage Requirements +-------------------------------- + +There are two ways of scheduling the I/O. One at file level (using +unify translator) and other at block level (using stripe translator). +Striped I/O is good for files that are potentially large and require +high parallel throughput (for example, a single file of 400GB being +accessed by 100s and 1000s of systems simultaneously and randomly). For +most of the cases, file level scheduling works best. + + In the real world, it is desirable to mix file level and block level +scheduling on a single storage volume. Alternatively users can choose +to have two separate volumes and hence two mount points, but the +applications may demand a single storage system to host both. + + This document explains how to mix file level scheduling with stripe. + +5.1.2 Configuration Brief +------------------------- + +This setup demonstrates how users can configure unify translator with +appropriate I/O scheduler for file level scheduling and strip for only +matching patterns. This way, GlusterFS chooses appropriate I/O profile +and knows how to efficiently handle both the types of data. + + A simple technique to achieve this effect is to create a stripe set +of unify and stripe blocks, where unify is the first sub-volume. Files +that do not match the stripe policy passed on to first unify sub-volume +and inturn scheduled arcoss the cluster using its file level I/O +scheduler. + + 5.1.3 Preparing GlusterFS Envoronment +------------------------------------- + +Create the directories /export/namespace, /export/unify and +/export/stripe on all the storage bricks. + + Place the following server and client volume spec file under +/etc/glusterfs (or appropriate installed path) and replace the IP +addresses / access control fields to match your environment. + + ## file: /etc/glusterfs/glusterfsd.vol + volume posix-unify + type storage/posix + option directory /export/for-unify + end-volume + + volume posix-stripe + type storage/posix + option directory /export/for-stripe + end-volume + + volume posix-namespace + type storage/posix + option directory /export/for-namespace + end-volume + + volume server + type protocol/server + option transport-type tcp + option auth.addr.posix-unify.allow 192.168.1.* + option auth.addr.posix-stripe.allow 192.168.1.* + option auth.addr.posix-namespace.allow 192.168.1.* + subvolumes posix-unify posix-stripe posix-namespace + end-volume + + ## file: /etc/glusterfs/glusterfs.vol + volume client-namespace + type protocol/client + option transport-type tcp + option remote-host 192.168.1.1 + option remote-subvolume posix-namespace + end-volume + + volume client-unify-1 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.1 + option remote-subvolume posix-unify + end-volume + + volume client-unify-2 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.2 + option remote-subvolume posix-unify + end-volume + + volume client-unify-3 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.3 + option remote-subvolume posix-unify + end-volume + + volume client-unify-4 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.4 + option remote-subvolume posix-unify + end-volume + + volume client-stripe-1 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.1 + option remote-subvolume posix-stripe + end-volume + + volume client-stripe-2 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.2 + option remote-subvolume posix-stripe + end-volume + + volume client-stripe-3 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.3 + option remote-subvolume posix-stripe + end-volume + + volume client-stripe-4 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.4 + option remote-subvolume posix-stripe + end-volume + + volume unify + type cluster/unify + option scheduler rr + subvolumes cluster-unify-1 cluster-unify-2 cluster-unify-3 cluster-unify-4 + end-volume + + volume stripe + type cluster/stripe + option block-size *.img:2MB # All files ending with .img are striped with 2MB stripe block size. + subvolumes unify cluster-stripe-1 cluster-stripe-2 cluster-stripe-3 cluster-stripe-4 + end-volume + + Bring up the Storage + + Starting GlusterFS Server: If you have installed through binary +package, you can start the service through init.d startup script. If +not: + + [root@server]# glusterfsd + + Mounting GlusterFS Volumes: + + [root@client]# glusterfs -s [BRICK-IP-ADDRESS] /mnt/cluster + + Improving upon this Setup + + Infiniband Verbs RDMA transport is much faster than TCP/IP GigE +transport. + + Use of performance translators such as read-ahead, write-behind, +io-cache, io-threads, booster is recommended. + + Replace round-robin (rr) scheduler with ALU to handle more dynamic +storage environments. + + ---------- Footnotes ---------- + + (1) +http://gluster.org/docs/index.php/Mixing_Striped_and_Regular_Files + + +File: user-guide.info, Node: Troubleshooting, Next: GNU Free Documentation Licence, Prev: Usage Scenarios, Up: Top + +6 Troubleshooting +***************** + +This chapter is a general troubleshooting guide to GlusterFS. It lists +common GlusterFS server and client error messages, debugging hints, and +concludes with the suggested procedure to report bugs in GlusterFS. + +6.1 GlusterFS error messages +============================ + +6.1.1 Server errors +------------------- + + glusterfsd: FATAL: could not open specfile: + '/etc/glusterfs/glusterfsd.vol' + + The GlusterFS server expects the volume specification file to be at +`/etc/glusterfs/glusterfsd.vol'. The example specification file will be +installed as `/etc/glusterfs/glusterfsd.vol.sample'. You need to edit +it and rename it, or provide a different specification file using the +`--spec-file' command line option (See *note Server::). + + gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log" + (Permission denied) + + You don't have permission to create files in the +`/usr/var/log/glusterfs' directory. Make sure you are running GlusterFS +as root. Alternatively, specify a different path for the log file using +the `--log-file' option (See *note Server::). + +6.1.2 Client errors +------------------- + + fusermount: failed to access mountpoint /mnt: + Transport endpoint is not connected + + A previous failed (or hung) mount of GlusterFS is preventing it from +being mounted again in the same location. The fix is to do: + + # umount /mnt + + and try mounting again. + + *"Transport endpoint is not connected".* + + If you get this error when you try a command such as `ls' or `cat', +it means the GlusterFS mount did not succeed. Try running GlusterFS in +`DEBUG' logging level and study the log messages to discover the cause. + + *"Connect to server failed", "SERVER-ADDRESS: Connection refused".* + + GluserFS Server is not running or dead. Check your network +connections and firewall settings. To check if the server is reachable, +try: + + telnet IP-ADDRESS 24007 + + If the server is accessible, your `telnet' command should connect and +block. If not you will see an error message such as `telnet: Unable to +connect to remote host: Connection refused'. 24007 is the default +GlusterFS port. If you have changed it, then use the corresponding port +instead. + + gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log" + (Permission denied) + + You don't have permission to create files in the +`/usr/var/log/glusterfs' directory. Make sure you are running GlusterFS +as root. Alternatively, specify a different path for the log file using +the `--log-file' option (See *note Client::). + +6.2 FUSE error messages +======================= + +`modprobe fuse' fails with: "Unknown symbol in module, or unknown +parameter". + + If you are using fuse-2.6.x on Redhat Enterprise Linux Work Station 4 +and Advanced Server 4 with 2.6.9-42.ELlargesmp, 2.6.9-42.ELsmp, +2.6.9-42.EL kernels and get this error while loading FUSE kernel +module, you need to apply the following patch. + + For fuse-2.6.2: + +<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch> + + For fuse-2.6.3: + +<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch> + +6.3 AppArmour and GlusterFS +=========================== + +Under OpenSuSE GNU/Linux, the AppArmour security feature does not allow +GlusterFS to create temporary files or network socket connections even +while running as root. You will see error messages like `Unable to open +log file: Operation not permitted' or `Connection refused'. Disabling +AppArmour using YaST or properly configuring AppArmour to recognize +`glusterfsd' or `glusterfs'/`fusermount' should solve the problem. + +6.4 Reporting a bug +=================== + +If you encounter a bug in GlusterFS, please follow the below guidelines +when you report it to the mailing list. Be sure to report it! User +feedback is crucial to the health of the project and we value it highly. + +6.4.1 General instructions +-------------------------- + +When running GlusterFS in a non-production environment, be sure to +build it with the following command: + + $ make CFLAGS='-g -O0 -DDEBUG' + + This includes debugging information which will be helpful in getting +backtraces (see below) and also disable optimization. Enabling +optimization can result in incorrect line numbers being reported to gdb. + +6.4.2 Volume specification files +-------------------------------- + +Attach all relevant server and client spec files you were using when +you encountered the bug. Also tell us details of your setup, i.e., how +many clients and how many servers. + +6.4.3 Log files +--------------- + +Set the loglevel of your client and server programs to DEBUG (by +passing the -L DEBUG option) and attach the log files with your bug +report. Obviously, if only the client is failing (for example), you +only need to send us the client log file. + +6.4.4 Backtrace +--------------- + +If GlusterFS has encountered a segmentation fault or has crashed for +some other reason, include the backtrace with the bug report. You can +get the backtrace using the following procedure. + + Run the GlusterFS client or server inside gdb. + + $ gdb ./glusterfs + (gdb) set args -f client.spec -N -l/path/to/log/file -LDEBUG /mnt/point + (gdb) run + + Now when the process segfaults, you can get the backtrace by typing: + + (gdb) bt + + If the GlusterFS process has crashed and dumped a core file (you can +find this in / if running as a daemon and in the current directory +otherwise), you can do: + + $ gdb /path/to/glusterfs /path/to/core.<pid> + + and then get the backtrace. + + If the GlusterFS server or client seems to be hung, then you can get +the backtrace by attaching gdb to the process. First get the `PID' of +the process (using ps), and then do: + + $ gdb ./glusterfs <pid> + + Press Ctrl-C to interrupt the process and then generate the +backtrace. + +6.4.5 Reproducing the bug +------------------------- + +If the bug is reproducible, please include the steps necessary to do +so. If the bug is not reproducible, send us the bug report anyway. + +6.4.6 Other information +----------------------- + +If you think it is relevant, send us also the version of FUSE you're +using, the kernel version, platform. + + +File: user-guide.info, Node: GNU Free Documentation Licence, Next: Index, Prev: Troubleshooting, Up: Top + +Appendix A GNU Free Documentation Licence +***************************************** + + Version 1.2, November 2002 + + Copyright (C) 2000,2001,2002 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + 0. PREAMBLE + + The purpose of this License is to make a manual, textbook, or other + functional and useful document "free" in the sense of freedom: to + assure everyone the effective freedom to copy and redistribute it, + with or without modifying it, either commercially or + noncommercially. Secondarily, this License preserves for the + author and publisher a way to get credit for their work, while not + being considered responsible for modifications made by others. + + This License is a kind of "copyleft", which means that derivative + works of the document must themselves be free in the same sense. + It complements the GNU General Public License, which is a copyleft + license designed for free software. + + We have designed this License in order to use it for manuals for + free software, because free software needs free documentation: a + free program should come with manuals providing the same freedoms + that the software does. But this License is not limited to + software manuals; it can be used for any textual work, regardless + of subject matter or whether it is published as a printed book. + We recommend this License principally for works whose purpose is + instruction or reference. + + 1. APPLICABILITY AND DEFINITIONS + + This License applies to any manual or other work, in any medium, + that contains a notice placed by the copyright holder saying it + can be distributed under the terms of this License. Such a notice + grants a world-wide, royalty-free license, unlimited in duration, + to use that work under the conditions stated herein. The + "Document", below, refers to any such manual or work. Any member + of the public is a licensee, and is addressed as "you". You + accept the license if you copy, modify or distribute the work in a + way requiring permission under copyright law. + + A "Modified Version" of the Document means any work containing the + Document or a portion of it, either copied verbatim, or with + modifications and/or translated into another language. + + A "Secondary Section" is a named appendix or a front-matter section + of the Document that deals exclusively with the relationship of the + publishers or authors of the Document to the Document's overall + subject (or to related matters) and contains nothing that could + fall directly within that overall subject. (Thus, if the Document + is in part a textbook of mathematics, a Secondary Section may not + explain any mathematics.) The relationship could be a matter of + historical connection with the subject or with related matters, or + of legal, commercial, philosophical, ethical or political position + regarding them. + + The "Invariant Sections" are certain Secondary Sections whose + titles are designated, as being those of Invariant Sections, in + the notice that says that the Document is released under this + License. If a section does not fit the above definition of + Secondary then it is not allowed to be designated as Invariant. + The Document may contain zero Invariant Sections. If the Document + does not identify any Invariant Sections then there are none. + + The "Cover Texts" are certain short passages of text that are + listed, as Front-Cover Texts or Back-Cover Texts, in the notice + that says that the Document is released under this License. A + Front-Cover Text may be at most 5 words, and a Back-Cover Text may + be at most 25 words. + + A "Transparent" copy of the Document means a machine-readable copy, + represented in a format whose specification is available to the + general public, that is suitable for revising the document + straightforwardly with generic text editors or (for images + composed of pixels) generic paint programs or (for drawings) some + widely available drawing editor, and that is suitable for input to + text formatters or for automatic translation to a variety of + formats suitable for input to text formatters. A copy made in an + otherwise Transparent file format whose markup, or absence of + markup, has been arranged to thwart or discourage subsequent + modification by readers is not Transparent. An image format is + not Transparent if used for any substantial amount of text. A + copy that is not "Transparent" is called "Opaque". + + Examples of suitable formats for Transparent copies include plain + ASCII without markup, Texinfo input format, LaTeX input format, + SGML or XML using a publicly available DTD, and + standard-conforming simple HTML, PostScript or PDF designed for + human modification. Examples of transparent image formats include + PNG, XCF and JPG. Opaque formats include proprietary formats that + can be read and edited only by proprietary word processors, SGML or + XML for which the DTD and/or processing tools are not generally + available, and the machine-generated HTML, PostScript or PDF + produced by some word processors for output purposes only. + + The "Title Page" means, for a printed book, the title page itself, + plus such following pages as are needed to hold, legibly, the + material this License requires to appear in the title page. For + works in formats which do not have any title page as such, "Title + Page" means the text near the most prominent appearance of the + work's title, preceding the beginning of the body of the text. + + A section "Entitled XYZ" means a named subunit of the Document + whose title either is precisely XYZ or contains XYZ in parentheses + following text that translates XYZ in another language. (Here XYZ + stands for a specific section name mentioned below, such as + "Acknowledgements", "Dedications", "Endorsements", or "History".) + To "Preserve the Title" of such a section when you modify the + Document means that it remains a section "Entitled XYZ" according + to this definition. + + The Document may include Warranty Disclaimers next to the notice + which states that this License applies to the Document. These + Warranty Disclaimers are considered to be included by reference in + this License, but only as regards disclaiming warranties: any other + implication that these Warranty Disclaimers may have is void and + has no effect on the meaning of this License. + + 2. VERBATIM COPYING + + You may copy and distribute the Document in any medium, either + commercially or noncommercially, provided that this License, the + copyright notices, and the license notice saying this License + applies to the Document are reproduced in all copies, and that you + add no other conditions whatsoever to those of this License. You + may not use technical measures to obstruct or control the reading + or further copying of the copies you make or distribute. However, + you may accept compensation in exchange for copies. If you + distribute a large enough number of copies you must also follow + the conditions in section 3. + + You may also lend copies, under the same conditions stated above, + and you may publicly display copies. + + 3. COPYING IN QUANTITY + + If you publish printed copies (or copies in media that commonly + have printed covers) of the Document, numbering more than 100, and + the Document's license notice requires Cover Texts, you must + enclose the copies in covers that carry, clearly and legibly, all + these Cover Texts: Front-Cover Texts on the front cover, and + Back-Cover Texts on the back cover. Both covers must also clearly + and legibly identify you as the publisher of these copies. The + front cover must present the full title with all words of the + title equally prominent and visible. You may add other material + on the covers in addition. Copying with changes limited to the + covers, as long as they preserve the title of the Document and + satisfy these conditions, can be treated as verbatim copying in + other respects. + + If the required texts for either cover are too voluminous to fit + legibly, you should put the first ones listed (as many as fit + reasonably) on the actual cover, and continue the rest onto + adjacent pages. + + If you publish or distribute Opaque copies of the Document + numbering more than 100, you must either include a + machine-readable Transparent copy along with each Opaque copy, or + state in or with each Opaque copy a computer-network location from + which the general network-using public has access to download + using public-standard network protocols a complete Transparent + copy of the Document, free of added material. If you use the + latter option, you must take reasonably prudent steps, when you + begin distribution of Opaque copies in quantity, to ensure that + this Transparent copy will remain thus accessible at the stated + location until at least one year after the last time you + distribute an Opaque copy (directly or through your agents or + retailers) of that edition to the public. + + It is requested, but not required, that you contact the authors of + the Document well before redistributing any large number of + copies, to give them a chance to provide you with an updated + version of the Document. + + 4. MODIFICATIONS + + You may copy and distribute a Modified Version of the Document + under the conditions of sections 2 and 3 above, provided that you + release the Modified Version under precisely this License, with + the Modified Version filling the role of the Document, thus + licensing distribution and modification of the Modified Version to + whoever possesses a copy of it. In addition, you must do these + things in the Modified Version: + + A. Use in the Title Page (and on the covers, if any) a title + distinct from that of the Document, and from those of + previous versions (which should, if there were any, be listed + in the History section of the Document). You may use the + same title as a previous version if the original publisher of + that version gives permission. + + B. List on the Title Page, as authors, one or more persons or + entities responsible for authorship of the modifications in + the Modified Version, together with at least five of the + principal authors of the Document (all of its principal + authors, if it has fewer than five), unless they release you + from this requirement. + + C. State on the Title page the name of the publisher of the + Modified Version, as the publisher. + + D. Preserve all the copyright notices of the Document. + + E. Add an appropriate copyright notice for your modifications + adjacent to the other copyright notices. + + F. Include, immediately after the copyright notices, a license + notice giving the public permission to use the Modified + Version under the terms of this License, in the form shown in + the Addendum below. + + G. Preserve in that license notice the full lists of Invariant + Sections and required Cover Texts given in the Document's + license notice. + + H. Include an unaltered copy of this License. + + I. Preserve the section Entitled "History", Preserve its Title, + and add to it an item stating at least the title, year, new + authors, and publisher of the Modified Version as given on + the Title Page. If there is no section Entitled "History" in + the Document, create one stating the title, year, authors, + and publisher of the Document as given on its Title Page, + then add an item describing the Modified Version as stated in + the previous sentence. + + J. Preserve the network location, if any, given in the Document + for public access to a Transparent copy of the Document, and + likewise the network locations given in the Document for + previous versions it was based on. These may be placed in + the "History" section. You may omit a network location for a + work that was published at least four years before the + Document itself, or if the original publisher of the version + it refers to gives permission. + + K. For any section Entitled "Acknowledgements" or "Dedications", + Preserve the Title of the section, and preserve in the + section all the substance and tone of each of the contributor + acknowledgements and/or dedications given therein. + + L. Preserve all the Invariant Sections of the Document, + unaltered in their text and in their titles. Section numbers + or the equivalent are not considered part of the section + titles. + + M. Delete any section Entitled "Endorsements". Such a section + may not be included in the Modified Version. + + N. Do not retitle any existing section to be Entitled + "Endorsements" or to conflict in title with any Invariant + Section. + + O. Preserve any Warranty Disclaimers. + + If the Modified Version includes new front-matter sections or + appendices that qualify as Secondary Sections and contain no + material copied from the Document, you may at your option + designate some or all of these sections as invariant. To do this, + add their titles to the list of Invariant Sections in the Modified + Version's license notice. These titles must be distinct from any + other section titles. + + You may add a section Entitled "Endorsements", provided it contains + nothing but endorsements of your Modified Version by various + parties--for example, statements of peer review or that the text + has been approved by an organization as the authoritative + definition of a standard. + + You may add a passage of up to five words as a Front-Cover Text, + and a passage of up to 25 words as a Back-Cover Text, to the end + of the list of Cover Texts in the Modified Version. Only one + passage of Front-Cover Text and one of Back-Cover Text may be + added by (or through arrangements made by) any one entity. If the + Document already includes a cover text for the same cover, + previously added by you or by arrangement made by the same entity + you are acting on behalf of, you may not add another; but you may + replace the old one, on explicit permission from the previous + publisher that added the old one. + + The author(s) and publisher(s) of the Document do not by this + License give permission to use their names for publicity for or to + assert or imply endorsement of any Modified Version. + + 5. COMBINING DOCUMENTS + + You may combine the Document with other documents released under + this License, under the terms defined in section 4 above for + modified versions, provided that you include in the combination + all of the Invariant Sections of all of the original documents, + unmodified, and list them all as Invariant Sections of your + combined work in its license notice, and that you preserve all + their Warranty Disclaimers. + + The combined work need only contain one copy of this License, and + multiple identical Invariant Sections may be replaced with a single + copy. If there are multiple Invariant Sections with the same name + but different contents, make the title of each such section unique + by adding at the end of it, in parentheses, the name of the + original author or publisher of that section if known, or else a + unique number. Make the same adjustment to the section titles in + the list of Invariant Sections in the license notice of the + combined work. + + In the combination, you must combine any sections Entitled + "History" in the various original documents, forming one section + Entitled "History"; likewise combine any sections Entitled + "Acknowledgements", and any sections Entitled "Dedications". You + must delete all sections Entitled "Endorsements." + + 6. COLLECTIONS OF DOCUMENTS + + You may make a collection consisting of the Document and other + documents released under this License, and replace the individual + copies of this License in the various documents with a single copy + that is included in the collection, provided that you follow the + rules of this License for verbatim copying of each of the + documents in all other respects. + + You may extract a single document from such a collection, and + distribute it individually under this License, provided you insert + a copy of this License into the extracted document, and follow + this License in all other respects regarding verbatim copying of + that document. + + 7. AGGREGATION WITH INDEPENDENT WORKS + + A compilation of the Document or its derivatives with other + separate and independent documents or works, in or on a volume of + a storage or distribution medium, is called an "aggregate" if the + copyright resulting from the compilation is not used to limit the + legal rights of the compilation's users beyond what the individual + works permit. When the Document is included in an aggregate, this + License does not apply to the other works in the aggregate which + are not themselves derivative works of the Document. + + If the Cover Text requirement of section 3 is applicable to these + copies of the Document, then if the Document is less than one half + of the entire aggregate, the Document's Cover Texts may be placed + on covers that bracket the Document within the aggregate, or the + electronic equivalent of covers if the Document is in electronic + form. Otherwise they must appear on printed covers that bracket + the whole aggregate. + + 8. TRANSLATION + + Translation is considered a kind of modification, so you may + distribute translations of the Document under the terms of section + 4. Replacing Invariant Sections with translations requires special + permission from their copyright holders, but you may include + translations of some or all Invariant Sections in addition to the + original versions of these Invariant Sections. You may include a + translation of this License, and all the license notices in the + Document, and any Warranty Disclaimers, provided that you also + include the original English version of this License and the + original versions of those notices and disclaimers. In case of a + disagreement between the translation and the original version of + this License or a notice or disclaimer, the original version will + prevail. + + If a section in the Document is Entitled "Acknowledgements", + "Dedications", or "History", the requirement (section 4) to + Preserve its Title (section 1) will typically require changing the + actual title. + + 9. TERMINATION + + You may not copy, modify, sublicense, or distribute the Document + except as expressly provided for under this License. Any other + attempt to copy, modify, sublicense or distribute the Document is + void, and will automatically terminate your rights under this + License. However, parties who have received copies, or rights, + from you under this License will not have their licenses + terminated so long as such parties remain in full compliance. + + 10. FUTURE REVISIONS OF THIS LICENSE + + The Free Software Foundation may publish new, revised versions of + the GNU Free Documentation License from time to time. Such new + versions will be similar in spirit to the present version, but may + differ in detail to address new problems or concerns. See + `http://www.gnu.org/copyleft/'. + + Each version of the License is given a distinguishing version + number. If the Document specifies that a particular numbered + version of this License "or any later version" applies to it, you + have the option of following the terms and conditions either of + that specified version or of any later version that has been + published (not as a draft) by the Free Software Foundation. If + the Document does not specify a version number of this License, + you may choose any version ever published (not as a draft) by the + Free Software Foundation. + +A.0.1 ADDENDUM: How to use this License for your documents +---------------------------------------------------------- + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and license +notices just after the title page: + + Copyright (C) YEAR YOUR NAME. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.2 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. + + If you have Invariant Sections, Front-Cover Texts and Back-Cover +Texts, replace the "with...Texts." line with this: + + with the Invariant Sections being LIST THEIR TITLES, with + the Front-Cover Texts being LIST, and with the Back-Cover Texts + being LIST. + + If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + + If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, to +permit their use in free software. + + +File: user-guide.info, Node: Index, Prev: GNU Free Documentation Licence, Up: Top + +Index +***** + + +* Menu: + +* alu (scheduler): Unify. (line 49) +* AppArmour: Troubleshooting. (line 96) +* arch: Getting GlusterFS. (line 6) +* booster: Booster. (line 6) +* commercial support: Introduction. (line 36) +* DNS round robin: Transport modules. (line 29) +* fcntl: POSIX Locks. (line 6) +* FDL, GNU Free Documentation License: GNU Free Documentation Licence. + (line 6) +* fixed-id (translator): Fixed ID. (line 6) +* GlusterFS client: Client. (line 6) +* GlusterFS mailing list: Introduction. (line 28) +* GlusterFS server: Server. (line 6) +* infiniband transport: Transport modules. (line 58) +* InfiniBand, installation: Pre requisites. (line 51) +* io-cache (translator): IO Cache. (line 6) +* io-threads (translator): IO Threads. (line 6) +* IRC channel, #gluster: Introduction. (line 31) +* libibverbs: Pre requisites. (line 51) +* namespace: Unify. (line 207) +* nufa (scheduler): Unify. (line 175) +* OpenSuSE: Troubleshooting. (line 96) +* posix-locks (translator): POSIX Locks. (line 6) +* random (scheduler): Unify. (line 159) +* read-ahead (translator): Read Ahead. (line 6) +* record locking: POSIX Locks. (line 6) +* Redhat Enterprise Linux: Troubleshooting. (line 78) +* Replicate: Replicate. (line 6) +* rot-13 (translator): ROT-13. (line 6) +* rr (scheduler): Unify. (line 138) +* scheduler (unify): Unify. (line 6) +* self heal (replicate): Replicate. (line 46) +* self heal (unify): Unify. (line 223) +* stripe (translator): Stripe. (line 6) +* trace (translator): Trace. (line 6) +* unify (translator): Unify. (line 6) +* unify invariants: Unify. (line 16) +* write-behind (translator): Write Behind. (line 6) +* Gluster, Inc.: Introduction. (line 36) + + + +Tag Table: +Node: Top704 +Node: Acknowledgements2304 +Node: Introduction3214 +Node: Installation and Invocation4649 +Node: Pre requisites4933 +Node: Getting GlusterFS7023 +Ref: Getting GlusterFS-Footnote-17809 +Node: Building7857 +Node: Running GlusterFS9559 +Node: Server9770 +Node: Client11358 +Node: A Tutorial Introduction13564 +Node: Concepts17101 +Node: Filesystems in Userspace17316 +Node: Translator18457 +Node: Volume specification file21160 +Node: Translators23632 +Node: Storage Translators24201 +Ref: Storage Translators-Footnote-125008 +Node: POSIX25142 +Node: BDB25765 +Node: Client and Server Translators26822 +Node: Transport modules27298 +Node: Client protocol31445 +Node: Server protocol32384 +Node: Clustering Translators33373 +Node: Unify34260 +Ref: Unify-Footnote-143859 +Node: Replicate43951 +Node: Stripe49006 +Node: Performance Translators50164 +Node: Read Ahead50438 +Node: Write Behind52170 +Node: IO Threads53579 +Node: IO Cache54367 +Node: Booster55691 +Node: Features Translators57105 +Node: POSIX Locks57333 +Node: Fixed ID58650 +Node: Miscellaneous Translators59136 +Node: ROT-1359334 +Node: Trace60013 +Node: Usage Scenarios61282 +Ref: Usage Scenarios-Footnote-167215 +Node: Troubleshooting67290 +Node: GNU Free Documentation Licence73638 +Node: Index96087 + +End Tag Table diff --git a/doc/legacy/user-guide.pdf b/doc/legacy/user-guide.pdf Binary files differnew file mode 100644 index 00000000000..ed7bd2a9907 --- /dev/null +++ b/doc/legacy/user-guide.pdf diff --git a/doc/legacy/user-guide.texi b/doc/legacy/user-guide.texi new file mode 100644 index 00000000000..8e429853ffd --- /dev/null +++ b/doc/legacy/user-guide.texi @@ -0,0 +1,2246 @@ +\input texinfo +@setfilename user-guide.info +@settitle GlusterFS 2.0 User Guide +@afourpaper + +@direntry +* GlusterFS: (user-guide). GlusterFS distributed filesystem user guide +@end direntry + +@copying +This is the user manual for GlusterFS 2.0. + +Copyright @copyright{} 2007-2011 @email{@b{Gluster}} , Inc. Permission is granted to +copy, distribute and/or modify this document under the terms of the +@acronym{GNU} Free Documentation License, Version 1.2 or any later +version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the +license is included in the chapter entitled ``@acronym{GNU} Free +Documentation License''. +@end copying + +@titlepage +@title GlusterFS 2.0 User Guide [DRAFT] +@subtitle January 15, 2008 +@author http://gluster.org/core-team.php +@author @email{@b{Gluster}} +@page +@vskip 0pt plus 1filll +@insertcopying +@end titlepage + +@c Info stuff +@ifnottex +@node Top +@top GlusterFS 2.0 User Guide + +@insertcopying +@menu +* Acknowledgements:: +* Introduction:: +* Installation and Invocation:: +* Concepts:: +* Translators:: +* Usage Scenarios:: +* Troubleshooting:: +* GNU Free Documentation Licence:: +* Index:: + +@detailmenu + --- The Detailed Node Listing --- + +Installation and Invocation + +* Pre requisites:: +* Getting GlusterFS:: +* Building:: +* Running GlusterFS:: +* A Tutorial Introduction:: + +Running GlusterFS + +* Server:: +* Client:: + +Concepts + +* Filesystems in Userspace:: +* Translator:: +* Volume specification file:: + +Translators + +* Storage Translators:: +* Client and Server Translators:: +* Clustering Translators:: +* Performance Translators:: +* Features Translators:: + +Storage Translators + +* POSIX:: + +Client and Server Translators + +* Transport modules:: +* Client protocol:: +* Server protocol:: + +Clustering Translators + +* Unify:: +* Replicate:: +* Stripe:: + +Performance Translators + +* Read Ahead:: +* Write Behind:: +* IO Threads:: +* IO Cache:: + +Features Translators + +* POSIX Locks:: +* Fixed ID:: + +Miscellaneous Translators + +* ROT-13:: +* Trace:: + +@end detailmenu +@end menu + +@end ifnottex +@c Info stuff end + +@contents + +@node Acknowledgements +@unnumbered Acknowledgements +GlusterFS continues to be a wonderful and enriching experience for all +of us involved. + +GlusterFS development would not have been possible at this pace if +not for our enthusiastic users. People from around the world have +helped us with bug reports, performance numbers, and feature suggestions. +A huge thanks to them all. + +Matthew Paine - for RPMs & general enthu + +Leonardo Rodrigues de Mello - for DEBs + +Julian Perez & Adam D'Auria - for multi-server tutorial + +Paul England - for HA spec + +Brent Nelson - for many bug reports + +Jacques Mattheij - for Europe mirror. + +Patrick Negri - for TCP non-blocking connect. +@flushright +http://gluster.org/core-team.php (@email{list-hacking@@gluster.com}) +@email{@b{Gluster}} +@end flushright + +@node Introduction +@chapter Introduction + +GlusterFS is a distributed filesystem. It works at the file level, +not block level. + +A network filesystem is one which allows us to access remote files. A +distributed filesystem is one that stores data on multiple machines +and makes them all appear to be a part of the same filesystem. + +Need for distributed filesystems + +@itemize @bullet +@item Scalability: A distributed filesystem allows us to store more data than what can be stored on a single machine. + +@item Redundancy: We might want to replicate crucial data on to several machines. + +@item Uniform access: One can mount a remote volume (for example your home directory) from any machine and access the same data. +@end itemize + +@section Contacting us +You can reach us through the mailing list @strong{gluster-devel} +(@email{gluster-devel@@nongnu.org}). +@cindex GlusterFS mailing list + +You can also find many of the developers on @acronym{IRC}, on the @code{#gluster} +channel on Freenode (@indicateurl{irc.freenode.net}). +@cindex IRC channel, #gluster + +The GlusterFS documentation wiki is also useful: @* +@indicateurl{http://gluster.org/docs/index.php/GlusterFS} + +For commercial support, you can contact @email{@b{Gluster}} at: +@cindex commercial support +@cindex Gluster, Inc. + +@display +3194 Winding Vista Common +Fremont, CA 94539 +USA. + +Phone: +1 (510) 354 6801 +Toll free: +1 (888) 813 6309 +Fax: +1 (510) 372 0604 +@end display + +You can also email us at @email{support@@gluster.com}. + +@node Installation and Invocation +@chapter Installation and Invocation + +@menu +* Pre requisites:: +* Getting GlusterFS:: +* Building:: +* Running GlusterFS:: +* A Tutorial Introduction:: +@end menu + +@node Pre requisites +@section Pre requisites + +Before installing GlusterFS make sure you have the +following components installed. + +@subsection @acronym{FUSE} +GlusterFS has now built-in support for the @acronym{FUSE} protocol. +You need a kernel with @acronym{FUSE} support to mount GlusterFS. +You do not need the @acronym{FUSE} package (library and utilities), +but be aware of the following issues: + +@itemize +@item If you want unprivileged users to be able to mount GlusterFS filesystems, +you need a recent version of the @command{fusermount} utility. You already have +it if you have @acronym{FUSE} version 2.7.0 or higher installed; if that's not +the case, one will be compiled along with GlusterFS if you pass +@command{--enable-fusermount} to the @command{configure} script. @item You +need to ensure @acronym{FUSE} support is configured properly on your system. In +details: +@itemize +@item If your kernel has @acronym{FUSE} as a loadable module, make sure it's +loaded. +@item Create @command{/dev/fuse} (major 10, minor 229) either by means of udev +rules or by hand. +@item Optionally, if you want runtime control over your @acronym{FUSE} mounts, +mount the fusectl auxiliary filesystem: + +@example +# mount -t fusectl none /sys/fs/fuse/connections +@end example +@end itemize + +The @acronym{FUSE} packages shipped by the various distributions usually take care +about these things, so the easiest way to get the above tasks handled is still +installing the @acronym{FUSE} package(s). +@end itemize + +To get the best performance from GlusterFS,it is recommended that you use +our patched version of the @acronym{FUSE} kernel module. See Patched FUSE for details. + +@subsection Patched FUSE + +The GlusterFS project maintains a patched version of @acronym{FUSE} meant to be used +with GlusterFS. The patches increase GlusterFS performance. It is recommended that +all users use the patched @acronym{FUSE}. + +The patched @acronym{FUSE} tarball can be downloaded from: + +@indicateurl{ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/} + +The specific changes made to @acronym{FUSE} are: + +@itemize +@item The communication channel size between @acronym{FUSE} kernel module and GlusterFS has been increased to 1MB, permitting large reads and writes to be sent in bigger chunks. + +@item The kernel's read-ahead boundry has been extended upto 1MB. + +@item Block size returned in the @command{stat()}/@command{fstat()} calls tuned to 1MB, to make cp and similar commands perform I/O using that block size. + +@item @command{flock()} locking support has been added (although some rework in GlusterFS is needed for perfect compliance). +@end itemize + +@subsection libibverbs (optional) +@cindex InfiniBand, installation +@cindex libibverbs +This is only needed if you want GlusterFS to use InfiniBand as the +interconnect mechanism between server and client. You can get it from: + +@indicateurl{http://www.openfabrics.org/downloads.htm}. + +@subsection Bison and Flex +These should be already installed on most Linux systems. If not, use your distribution's +normal software installation procedures to install them. Make sure you install the +relevant developer packages also. + +@node Getting GlusterFS +@section Getting GlusterFS +@cindex arch +There are many ways to get hold of GlusterFS. For a production deployment, +the recommended method is to download the latest release tarball. +Release tarballs are available at: @indicateurl{http://gluster.org/download.php}. + +If you want the bleeding edge development source, you can get them +from the Git +@footnote{@indicateurl{http://git-scm.com}} +repository. First you must install Git itself. Then +you can check out the source + +@example +$ git clone git://git.sv.gnu.org/gluster.git glusterfs +@end example + +@node Building +@section Building +You can skip this section if you're installing from @acronym{RPM}s +or @acronym{DEB}s. + +GlusterFS uses the Autotools mechanism to build. As such, the procedure +is straight-forward. First, change into the GlusterFS source directory. + +@example +$ cd glusterfs-<version> +@end example + +If you checked out the source from the Arch repository, you'll need +to run @command{./autogen.sh} first. Note that you'll need to have +Autoconf and Automake installed for this. + +Run @command{configure}. + +@example +$ ./configure +@end example + +The configure script accepts the following options: + +@cartouche +@table @code + +@item --disable-ibverbs +Disable the InfiniBand transport mechanism. + +@item --disable-fuse-client +Disable the @acronym{FUSE} client. + +@item --disable-server +Disable building of the GlusterFS server. + +@item --disable-bdb +Disable building of Berkeley DB based storage translator. + +@item --disable-mod_glusterfs +Disable building of Apache/lighttpd glusterfs plugins. + +@item --disable-epoll +Use poll instead of epoll. + +@item --disable-libglusterfsclient +Disable building of libglusterfsclient + +@item --enable-fusermount +Build fusermount + +@end table +@end cartouche + +Build and install GlusterFS. + +@example +# make install +@end example + +The binaries (@command{glusterfsd} and @command{glusterfs}) will be by +default installed in @command{/usr/local/sbin/}. Translator, +scheduler, and transport shared libraries will be installed in +@command{/usr/local/lib/glusterfs/<version>/}. Sample volume +specification files will be in @command{/usr/local/etc/glusterfs/}. +This document itself can be found in +@command{/usr/local/share/doc/glusterfs/}. If you passed the @command{--prefix} +argument to the configure script, then replace @command{/usr/local} in the preceding +paths with the prefix. + +@node Running GlusterFS +@section Running GlusterFS + +@menu +* Server:: +* Client:: +@end menu + +@node Server +@subsection Server +@cindex GlusterFS server + +The GlusterFS server is necessary to export storage volumes to remote clients +(See @ref{Server protocol} for more info). This section documents the invocation +of the GlusterFS server program and all the command-line options accepted by it. + +@cartouche +@table @code +Basic Options +@item -f, --volfile=<path> + Use the volume file as the volume specification. + +@item -s, --volfile-server=<hostname> + Server to get volume file from. This option overrides --volfile option. + +@item -l, --log-file=<path> + Specify the path for the log file. + +@item -L, --log-level=<level> + Set the log level for the server. Log level should be one of @acronym{DEBUG}, +@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}. + +Advanced Options +@item --debug + Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and + --log-file to console. + +@item -N, --no-daemon + Run glusterfsd as a foreground process. + +@item -p, --pid-file=<path> + Path for the @acronym{PID} file. + +@item --volfile-id=<key> + 'key' of the volfile to be fetched from server. + +@item --volfile-server-port=<port-number> + Listening port number of volfile server. + +@item --volfile-server-transport=[tcp|ib-verbs] + Transport type to get volfile from server. [default: @command{tcp}] + +@item --xlator-options=<volume-name.option=value> + Add/override a translator option for a volume with specified value. + +Miscellaneous Options +@item -?, --help + Show this help text. + +@item --usage + Display a short usage message. + +@item -V, --version + Show version information. +@end table +@end cartouche + +@node Client +@subsection Client +@cindex GlusterFS client + +The GlusterFS client process is necessary to access remote storage volumes and +mount them locally using @acronym{FUSE}. This section documents the invocation of the +client process and all its command-line arguments. + +@example + # glusterfs [options] <mountpoint> +@end example + +The @command{mountpoint} is the directory where you want the GlusterFS +filesystem to appear. Example: + +@example + # glusterfs -f /usr/local/etc/glusterfs-client.vol /mnt +@end example + +The command-line options are detailed below. + +@tex +\vfill +@end tex +@page + +@cartouche +@table @code + +Basic Options +@item -f, --volfile=<path> + Use the volume file as the volume specification. + +@item -s, --volfile-server=<hostname> + Server to get volume file from. This option overrides --volfile option. + +@item -l, --log-file=<path> + Specify the path for the log file. + +@item -L, --log-level=<level> + Set the log level for the server. Log level should be one of @acronym{DEBUG}, +@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}. + +Advanced Options +@item --debug + Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and + --log-file to console. + +@item -N, --no-daemon + Run @command{glusterfs} as a foreground process. + +@item -p, --pid-file=<path> + Path for the @acronym{PID} file. + +@item --volfile-id=<key> + 'key' of the volfile to be fetched from server. + +@item --volfile-server-port=<port-number> + Listening port number of volfile server. + +@item --volfile-server-transport=[tcp|ib-verbs] + Transport type to get volfile from server. [default: @command{tcp}] + +@item --xlator-options=<volume-name.option=value> + Add/override a translator option for a volume with specified value. + +@item --volume-name=<volume name> + Volume name in client spec to use. Defaults to the root volume. + +@acronym{FUSE} Options +@item --attribute-timeout=<n> + Attribute timeout for inodes in the kernel, in seconds. Defaults to 1 second. + +@item --disable-direct-io-mode + Disable direct @acronym{I/O} mode in @acronym{FUSE} kernel module. This is set + automatically if kernel supports big writes (>= 2.6.26). + +@item -e, --entry-timeout=<n> + Entry timeout for directory entries in the kernel, in seconds. + Defaults to 1 second. + +Missellaneous Options +@item -?, --help + Show this help information. + +@item -V, --version + Show version information. +@end table +@end cartouche + +@node A Tutorial Introduction +@section A Tutorial Introduction + +This section will show you how to quickly get GlusterFS up and running. We'll +configure GlusterFS as a simple network filesystem, with one server and one client. +In this mode of usage, GlusterFS can serve as a replacement for NFS. + +We'll make use of two machines; call them @emph{server} and +@emph{client} (If you don't want to setup two machines, just run +everything that follows on the same machine). In the examples that +follow, the shell prompts will use these names to clarify the machine +on which the command is being run. For example, a command that should +be run on the server will be shown with the prompt: + +@example +[root@@server]# +@end example + +Our goal is to make a directory on the @emph{server} (say, @command{/export}) +accessible to the @emph{client}. + +First of all, get GlusterFS installed on both the machines, as described in the +previous sections. Make sure you have the @acronym{FUSE} kernel module loaded. You +can ensure this by running: + +@example +[root@@server]# modprobe fuse +@end example + +Before we can run the GlusterFS client or server programs, we need to write +two files called @emph{volume specifications} (equivalently refered to as @emph{volfiles}). +The volfile describes the @emph{translator tree} on a node. The next chapter will +explain the concepts of `translator' and `volume specification' in detail. For now, +just assume that the volfile is like an NFS @command{/etc/export} file. + +On the server, create a text file somewhere (we'll assume the path +@command{/tmp/glusterfsd.vol}) with the following contents. + +@cartouche +@example +volume colon-o + type storage/posix + option directory /export +end-volume + +volume server + type protocol/server + subvolumes colon-o + option transport-type tcp + option auth.addr.colon-o.allow * +end-volume +@end example +@end cartouche + +A brief explanation of the file's contents. The first section defines a storage +volume, named ``colon-o'' (the volume names are arbitrary), which exports the +@command{/export} directory. The second section defines options for the translator +which will make the storage volume accessible remotely. It specifies @command{colon-o} as +a subvolume. This defines the @emph{translator tree}, about which more will be said +in the next chapter. The two options specify that the @acronym{TCP} protocol is to be +used (as opposed to InfiniBand, for example), and that access to the storage volume +is to be provided to clients with any @acronym{IP} address at all. If you wanted to +restrict access to this server to only your subnet for example, you'd specify +something like @command{192.168.1.*} in the second option line. + +On the client machine, create the following text file (again, we'll assume +the path to be @command{/tmp/glusterfs-client.vol}). Replace +@emph{server-ip-address} with the @acronym{IP} address of your server machine. If you +are doing all this on a single machine, use @command{127.0.0.1}. + +@cartouche +@example +volume client + type protocol/client + option transport-type tcp + option remote-host @emph{server-ip-address} + option remote-subvolume colon-o +end-volume +@end example +@end cartouche + +Now we need to start both the server and client programs. To start the server: + +@example +[root@@server]# glusterfsd -f /tmp/glusterfs-server.vol +@end example + +To start the client: + +@example +[root@@client]# glusterfs -f /tmp/glusterfs-client.vol /mnt/glusterfs +@end example + +You should now be able to see the files under the server's @command{/export} directory +in the @command{/mnt/glusterfs} directory on the client. That's it; GlusterFS is now +working as a network file system. + +@node Concepts +@chapter Concepts + +@menu +* Filesystems in Userspace:: +* Translator:: +* Volume specification file:: +@end menu + +@node Filesystems in Userspace +@section Filesystems in Userspace + +A filesystem is usually implemented in kernel space. Kernel space +development is much harder than userspace development. @acronym{FUSE} +is a kernel module/library that allows us to write a filesystem +completely in userspace. + +@acronym{FUSE} consists of a kernel module which interacts with the userspace +implementation using a device file @code{/dev/fuse}. When a process +makes a syscall on a @acronym{FUSE} filesystem, @acronym{VFS} hands the request to the +@acronym{FUSE} module, which writes the request to @code{/dev/fuse}. The +userspace implementation polls @code{/dev/fuse}, and when a request arrives, +processes it and writes the result back to @code{/dev/fuse}. The kernel then +reads from the device file and returns the result to the user process. + +In case of GlusterFS, the userspace program is the GlusterFS client. +The control flow is shown in the diagram below. The GlusterFS client +services the request by sending it to the server, which in turn +hands it to the local @acronym{POSIX} filesystem. + +@center @image{fuse,44pc,,,.pdf} +@center Fig 1. Control flow in GlusterFS + +@node Translator +@section Translator + +The @emph{translator} is the most important concept in GlusterFS. In +fact, GlusterFS is nothing but a collection of translators working +together, forming a translator @emph{tree}. + +The idea of a translator is perhaps best understood using an +analogy. Consider the @acronym{VFS} in the Linux kernel. The +@acronym{VFS} abstracts the various filesystem implementations (such +as @acronym{EXT3}, ReiserFS, @acronym{XFS}, etc.) supported by the +kernel. When an application calls the kernel to perform an operation +on a file, the kernel passes the request on to the appropriate +filesystem implementation. + +For example, let's say there are two partitions on a Linux machine: +@command{/}, which is an @acronym{EXT3} partition, and @command{/usr}, +which is a ReiserFS partition. Now if an application wants to open a +file called, say, @command{/etc/fstab}, then the kernel will +internally pass the request to the @acronym{EXT3} implementation. If +on the other hand, an application wants to read a file called +@command{/usr/src/linux/CREDITS}, then the kernel will call upon the +ReiserFS implementation to do the job. + +The ``filesystem implementation'' objects are analogous to GlusterFS +translators. A GlusterFS translator implements all the filesystem +operations. Whereas in @acronym{VFS} there is a two-level tree (with +the kernel at the root and all the filesystem implementation as its +children), in GlusterFS there exists a more elaborate tree structure. + +We can now define translators more precisely. A GlusterFS translator +is a shared object (@command{.so}) that implements every filesystem +call. GlusterFS translators can be arranged in an arbitrary tree +structure (subject to constraints imposed by the translators). When +GlusterFS receives a filesystem call, it passes it on to the +translator at the root of the translator tree. The root translator may +in turn pass it on to any or all of its children, and so on, until the +leaf nodes are reached. The result of a filesystem call is +communicated in the reverse fashion, from the leaf nodes up to the +root node, and then on to the application. + +So what might a translator tree look like? + +@tex +\vfill +@end tex +@page + +@center @image{xlator,44pc,,,.pdf} +@center Fig 2. A sample translator tree + +The diagram depicts three servers and one GlusterFS client. It is important +to note that conceptually, the translator tree spans machine boundaries. +Thus, the client machine in the diagram, @command{10.0.0.1}, can access +the aggregated storage of the filesystems on the server machines @command{10.0.0.2}, +@command{10.0.0.3}, and @command{10.0.0.4}. The translator diagram will make more +sense once you've read the next chapter and understood the functions of the +various translators. + +@node Volume specification file +@section Volume specification file +The volume specification file describes the translator tree for both the +server and client programs. + +A volume specification file is a sequence of volume definitions. +The syntax of a volume definition is explained below: + +@cartouche +@example +@strong{volume} @emph{volume-name} + @strong{type} @emph{translator-name} + @strong{option} @emph{option-name} @emph{option-value} + @dots{} + @strong{subvolumes} @emph{subvolume1} @emph{subvolume2} @dots{} +@strong{end-volume} +@end example + +@dots{} +@end cartouche + +@table @asis +@item @emph{volume-name} + An identifier for the volume. This is just a human-readable name, +and can contain any alphanumeric character. For instance, ``storage-1'', ``colon-o'', +or ``forty-two''. + +@item @emph{translator-name} + Name of one of the available translators. Example: @command{protocol/client}, +@command{cluster/unify}. + +@item @emph{option-name} + Name of a valid option for the translator. + +@item @emph{option-value} + Value for the option. Everything following the ``option'' keyword to the end of the +line is considered the value; it is up to the translator to parse it. + +@item @emph{subvolume1}, @emph{subvolume2}, @dots{} + Volume names of sub-volumes. The sub-volumes must already have been defined earlier +in the file. +@end table + +There are a few rules you must follow when writing a volume specification file: + +@itemize +@item Everything following a `@command{#}' is considered a comment and is ignored. Blank lines are also ignored. +@item All names and keywords are case-sensitive. +@item The order of options inside a volume definition does not matter. +@item An option value may not span multiple lines. +@item If an option is not specified, it will assume its default value. +@item A sub-volume must have already been defined before it can be referenced. This means you have to write the specification file ``bottom-up'', starting from the leaf nodes of the translator tree and moving up to the root. +@end itemize + +A simple example volume specification file is shown below: + +@cartouche +@example +# This is a comment line +volume client + type protocol/client + option transport-type tcp + option remote-host localhost # Also a comment + option remote-subvolume brick +# The subvolumes line may be absent +end-volume + +volume iot + type performance/io-threads + option thread-count 4 + subvolumes client +end-volume + +volume wb + type performance/write-behind + subvolumes iot +end-volume +@end example +@end cartouche + +@node Translators +@chapter Translators + +@menu +* Storage Translators:: +* Client and Server Translators:: +* Clustering Translators:: +* Performance Translators:: +* Features Translators:: +* Miscellaneous Translators:: +@end menu + +This chapter documents all the available GlusterFS translators in detail. +Each translator section will show its name (for example, @command{cluster/unify}), +briefly describe its purpose and workings, and list every option accepted by +that translator and their meaning. + +@node Storage Translators +@section Storage Translators + +The storage translators form the ``backend'' for GlusterFS. Currently, +the only available storage translator is the @acronym{POSIX} +translator, which stores files on a normal @acronym{POSIX} +filesystem. A pleasant consequence of this is that your data will +still be accessible if GlusterFS crashes or cannot be started. + +Other storage backends are planned for the future. One of the possibilities is an +Amazon S3 translator. Amazon S3 is an unlimited online storage service accessible +through a web services @acronym{API}. The S3 translator will allow you to access +the storage as a normal @acronym{POSIX} filesystem. +@footnote{Some more discussion about this can be found at: + +http://developer.amazonwebservices.com/connect/message.jspa?messageID=52873} + +@menu +* POSIX:: +* BDB:: +@end menu + +@node POSIX +@subsection POSIX +@example +type storage/posix +@end example + +The @command{posix} translator uses a normal @acronym{POSIX} +filesystem as its ``backend'' to actually store files and +directories. This can be any filesystem that supports extended +attributes (@acronym{EXT3}, ReiserFS, @acronym{XFS}, ...). Extended +attributes are used by some translators to store metadata, for +example, by the replicate and stripe translators. See +@ref{Replicate} and @ref{Stripe}, respectively for details. + +@cartouche +@table @code +@item directory <path> +The directory on the local filesystem which is to be used for storage. +@end table +@end cartouche + +@node BDB +@subsection BDB +@example +type storage/bdb +@end example + +The @command{BDB} translator uses a @acronym{Berkeley DB} database as its +``backend'' to actually store files as key-value pair in the database and +directories as regular @acronym{POSIX} directories. Note that @acronym{BDB} +does not provide extended attribute support for regular files. Do not use +@acronym{BDB} as storage translator while using any translator that demands +extended attributes on ``backend''. + +@cartouche +@table @code +@item directory <path> +The directory on the local filesystem which is to be used for storage. +@item mode [cache|persistent] (cache) +When @acronym{BDB} is run in @command{cache} mode, recovery of back-end is not completely +guaranteed. @command{persistent} guarantees that @acronym{BDB} can recover back-end from +@acronym{Berkeley DB} even if GlusterFS crashes. +@item errfile <path> +The path of the file to be used as @command{errfile} for @acronym{Berkeley DB} to report +detailed error messages, if any. Note that all the contents of this file will be written +by @acronym{Berkeley DB}, not GlusterFS. +@item logdir <path> + + +@end table +@end cartouche + +@node Client and Server Translators, Clustering Translators, Storage Translators, Translators +@section Client and Server Translators + +The client and server translator enable GlusterFS to export a +translator tree over the network or access a remote GlusterFS +server. These two translators implement GlusterFS's network protocol. + +@menu +* Transport modules:: +* Client protocol:: +* Server protocol:: +@end menu + +@node Transport modules +@subsection Transport modules +The client and server translators are capable of using any of the +pluggable transport modules. Currently available transport modules are +@command{tcp}, which uses a @acronym{TCP} connection between client +and server to communicate; @command{ib-sdp}, which uses a +@acronym{TCP} connection over InfiniBand, and @command{ibverbs}, which +uses high-speed InfiniBand connections. + +Each transport module comes in two different versions, one to be used on +the server side and the other on the client side. + +@subsubsection TCP + +The @acronym{TCP} transport module uses a @acronym{TCP/IP} connection between +the server and the client. + +@example + option transport-type tcp +@end example + +The @acronym{TCP} client module accepts the following options: + +@cartouche +@table @code +@item non-blocking-connect [no|off|on|yes] (on) +Whether to make the connection attempt asynchronous. +@item remote-port <n> (24007) +Server port to connect to. +@cindex DNS round robin +@item remote-host <hostname> * +Hostname or @acronym{IP} address of the server. If the host name resolves to +multiple IP addresses, all of them will be tried in a round-robin fashion. This +feature can be used to implement fail-over. +@end table +@end cartouche + +The @acronym{TCP} server module accepts the following options: + +@cartouche +@table @code +@item bind-address <address> (0.0.0.0) +The local interface on which the server should listen to requests. Default is to +listen on all interfaces. +@item listen-port <n> (24007) +The local port to listen on. +@end table +@end cartouche + +@subsubsection IB-SDP +@example + option transport-type ib-sdp +@end example + +kernel implements socket interface for ib hardware. SDP is over ib-verbs. +This module accepts the same options as @command{tcp} + +@subsubsection ibverbs + +@example + option transport-type tcp +@end example + +@cindex infiniband transport + +InfiniBand is a scalable switched fabric interconnect mechanism +primarily used in high-performance computing. InfiniBand can deliver +data throughput of the order of 10 Gbit/s, with latencies of 4-5 ms. + +The @command{ib-verbs} transport accesses the InfiniBand hardware through +the ``verbs'' @acronym{API}, which is the lowest level of software access possible +and which gives the highest performance. On InfiniBand hardware, it is always +best to use @command{ib-verbs}. Use @command{ib-sdp} only if you cannot get +@command{ib-verbs} working for some reason. + +The @command{ib-verbs} client module accepts the following options: + +@cartouche +@table @code +@item non-blocking-connect [no|off|on|yes] (on) +Whether to make the connection attempt asynchronous. +@item remote-port <n> (24007) +Server port to connect to. +@cindex DNS round robin +@item remote-host <hostname> * +Hostname or @acronym{IP} address of the server. If the host name resolves to +multiple IP addresses, all of them will be tried in a round-robin fashion. This +feature can be used to implement fail-over. +@end table +@end cartouche + +The @command{ib-verbs} server module accepts the following options: + +@cartouche +@table @code +@item bind-address <address> (0.0.0.0) +The local interface on which the server should listen to requests. Default is to +listen on all interfaces. +@item listen-port <n> (24007) +The local port to listen on. +@end table +@end cartouche + +The following options are common to both the client and server modules: + +If you are familiar with InfiniBand jargon, +the mode is used by GlusterFS is ``reliable connection-oriented channel transfer''. + +@cartouche +@table @code +@item ib-verbs-work-request-send-count <n> (64) +Length of the send queue in datagrams. [Reason to increase/decrease?] + +@item ib-verbs-work-request-recv-count <n> (64) +Length of the receive queue in datagrams. [Reason to increase/decrease?] + +@item ib-verbs-work-request-send-size <size> (128KB) +Size of each datagram that is sent. [Reason to increase/decrease?] + +@item ib-verbs-work-request-recv-size <size> (128KB) +Size of each datagram that is received. [Reason to increase/decrease?] + +@item ib-verbs-port <n> (1) +Port number for ib-verbs. + +@item ib-verbs-mtu [256|512|1024|2048|4096] (2048) +The Maximum Transmission Unit [Reason to increase/decrease?] + +@item ib-verbs-device-name <device-name> (first device in the list) +InfiniBand device to be used. +@end table +@end cartouche + +For maximum performance, you should ensure that the send/receive counts on both +the client and server are the same. + +ib-verbs is preferred over ib-sdp. + +@node Client protocol +@subsection Client +@example +type procotol/client +@end example + +The client translator enables the GlusterFS client to access a remote server's +translator tree. + +@cartouche +@table @code + +@item transport-type [tcp,ib-sdp,ib-verbs] (tcp) +The transport type to use. You should use the client versions of all the +transport modules (@command{tcp}, @command{ib-sdp}, +@command{ib-verbs}). +@item remote-subvolume <volume_name> * +The name of the volume on the remote host to attach to. Note that +this is @emph{not} the name of the @command{protocol/server} volume on the +server. It should be any volume under the server. +@item transport-timeout <n> (120- seconds) +Inactivity timeout. If a reply is expected and no activity takes place +on the connection within this time, the transport connection will be +broken, and a new connection will be attempted. +@end table +@end cartouche + +@node Server protocol +@subsection Server +@example +type protocol/server +@end example + +The server translator exports a translator tree and makes it accessible to +remote GlusterFS clients. + +@cartouche +@table @code +@item client-volume-filename <path> (<CONFDIR>/glusterfs-client.vol) +The volume specification file to use for the client. This is the file the +client will receive when it is invoked with the @command{--server} option +(@ref{Client}). + +@item transport-type [tcp,ib-verbs,ib-sdp] (tcp) +The transport to use. You should use the server versions of all the transport +modules (@command{tcp}, @command{ib-sdp}, @command{ib-verbs}). + +@item auth.addr.<volume name>.allow <IP address wildcard pattern> +IP addresses of the clients that are allowed to attach to the specified volume. +This can be a wildcard. For example, a wildcard of the form @command{192.168.*.*} +allows any host in the @command{192.168.x.x} subnet to connect to the server. + +@end table +@end cartouche + +@node Clustering Translators +@section Clustering Translators + +The clustering translators are the most important GlusterFS +translators, since it is these that make GlusterFS a cluster +filesystem. These translators together enable GlusterFS to access an +arbitrarily large amount of storage, and provide @acronym{RAID}-like +redundancy and distribution over the entire cluster. + +There are three clustering translators: @strong{unify}, @strong{replicate}, +and @strong{stripe}. The unify translator aggregates storage from +many server nodes. The replicate translator provides file replication. The stripe +translator allows a file to be spread across many server nodes. The following sections +look at each of these translators in detail. + +@menu +* Unify:: +* Replicate:: +* Stripe:: +@end menu + +@node Unify +@subsection Unify +@cindex unify (translator) +@cindex scheduler (unify) +@example +type cluster/unify +@end example + +The unify translator presents a `unified' view of all its sub-volumes. That is, +it makes the union of all its sub-volumes appear as a single volume. It is the +unify translator that gives GlusterFS the ability to access an arbitrarily +large amount of storage. + +For unify to work correctly, certain invariants need to be maintained across +the entire network. These are: + +@cindex unify invariants +@itemize +@item The directory structure of all the sub-volumes must be identical. +@item A particular file can exist on only one of the sub-volumes. Phrasing it in another way, a pathname such as @command{/home/calvin/homework.txt}) is unique across the entire cluster. +@end itemize + +@tex +\vfill +@end tex +@page + +@center @image{unify,44pc,,,.pdf} + +Looking at the second requirement, you might wonder how one can +accomplish storing redundant copies of a file, if no file can exist +multiple times. To answer, we must remember that these invariants are +from @emph{unify's perspective}. A translator such as replicate at a lower +level in the translator tree than unify may subvert this picture. + +The first invariant might seem quite tedious to ensure. We shall see +later that this is not so, since unify's @emph{self-heal} mechanism +takes care of maintaining it. + +The second invariant implies that unify needs some way to decide which file goes where. +Unify makes use of @emph{scheduler} modules for this purpose. + +When a file needs to be created, unify's scheduler decides upon the +sub-volume to be used to store the file. There are many schedulers +available, each using a different algorithm and suitable for different +purposes. + +The various schedulers are described in detail in the sections that follow. + +@subsubsection ALU +@cindex alu (scheduler) + +@example + option scheduler alu +@end example + +ALU stands for "Adaptive Least Usage". It is the most advanced +scheduler available in GlusterFS. It balances the load across volumes +taking several factors in account. It adapts itself to changing I/O +patterns according to its configuration. When properly configured, it +can eliminate the need for regular tuning of the filesystem to keep +volume load nicely balanced. + +The ALU scheduler is composed of multiple least-usage +sub-schedulers. Each sub-scheduler keeps track of a certain type of +load, for each of the sub-volumes, getting statistics from +the sub-volumes themselves. The sub-schedulers are these: + +@itemize +@item disk-usage: The used and free disk space on the volume. + +@item read-usage: The amount of reading done from this volume. + +@item write-usage: The amount of writing done to this volume. + +@item open-files-usage: The number of files currently open from this volume. + +@item disk-speed-usage: The speed at which the disks are spinning. This is a constant value and therefore not very useful. +@end itemize + +The ALU scheduler needs to know which of these sub-schedulers to use, +and in which order to evaluate them. This is done through the +@command{option alu.order} configuration directive. + +Each sub-scheduler needs to know two things: when to kick in (the +entry-threshold), and how long to stay in control (the +exit-threshold). For example: when unifying three disks of 100GB, +keeping an exact balance of disk-usage is not necesary. Instead, there +could be a 1GB margin, which can be used to nicely balance other +factors, such as read-usage. The disk-usage scheduler can be told to +kick in only when a certain threshold of discrepancy is passed, such +as 1GB. When it assumes control under this condition, it will write +all subsequent data to the least-used volume. If it is doing so, it is +unwise to stop right after the values are below the entry-threshold +again, since that would make it very likely that the situation will +occur again very soon. Such a situation would cause the ALU to spend +most of its time disk-usage scheduling, which is unfair to the other +sub-schedulers. The exit-threshold therefore defines the amount of +data that needs to be written to the least-used disk, before control +is relinquished again. + +In addition to the sub-schedulers, the ALU scheduler also has "limits" +options. These can stop the creation of new files on a volume once +values drop below a certain threshold. For example, setting +@command{option alu.limits.min-free-disk 5GB} will stop the scheduling +of files to volumes that have less than 5GB of free disk space, +leaving the files on that disk some room to grow. + +The actual values you assign to the thresholds for sub-schedulers and +limits depend on your situation. If you have fast-growing files, +you'll want to stop file-creation on a disk much earlier than when +hardly any of your files are growing. If you care less about +disk-usage balance than about read-usage balance, you'll want a bigger +disk-usage scheduler entry-threshold and a smaller read-usage +scheduler entry-threshold. + +For thresholds defining a size, values specifying "KB", "MB" and "GB" +are allowed. For example: @command{option alu.limits.min-free-disk 5GB}. + +@cartouche +@table @code +@item alu.order <order> * ("disk-usage:write-usage:read-usage:open-files-usage:disk-speed") +@item alu.disk-usage.entry-threshold <size> (1GB) +@item alu.disk-usage.exit-threshold <size> (512MB) +@item alu.write-usage.entry-threshold <%> (25) +@item alu.write-usage.exit-threshold <%> (5) +@item alu.read-usage.entry-threshold <%> (25) +@item alu.read-usage.exit-threshold <%> (5) +@item alu.open-files-usage.entry-threshold <n> (1000) +@item alu.open-files-usage.exit-threshold <n> (100) +@item alu.limits.min-free-disk <%> +@item alu.limits.max-open-files <n> +@end table +@end cartouche + +@subsubsection Round Robin (RR) +@cindex rr (scheduler) + +@example + option scheduler rr +@end example + +Round-Robin (RR) scheduler creates files in a round-robin +fashion. Each client will have its own round-robin loop. When your +files are mostly similar in size and I/O access pattern, this +scheduler is a good choice. RR scheduler checks for free disk space +on the server before scheduling, so you can know when to add +another server node. The default value of min-free-disk is 5% and is +checked on file creation calls, with atleast 10 seconds (by default) +elapsing between two checks. + +Options: +@cartouche +@table @code +@item rr.limits.min-free-disk <%> (5) +Minimum free disk space a node must have for RR to schedule a file to it. +@item rr.refresh-interval <t> (10 seconds) +Time between two successive free disk space checks. +@end table +@end cartouche + +@subsubsection Random +@cindex random (scheduler) + +@example + option scheduler random +@end example + +The random scheduler schedules file creation randomly among its child nodes. +Like the round-robin scheduler, it also checks for a minimum amount of free disk +space before scheduling a file to a node. + +@cartouche +@table @code +@item random.limits.min-free-disk <%> (5) +Minimum free disk space a node must have for random to schedule a file to it. +@item random.refresh-interval <t> (10 seconds) +Time between two successive free disk space checks. +@end table +@end cartouche + +@subsubsection NUFA +@cindex nufa (scheduler) + +@example + option scheduler nufa +@end example + +It is common in many GlusterFS computing environments for all deployed +machines to act as both servers and clients. For example, a +research lab may have 40 workstations each with its own storage. All +of these workstations might act as servers exporting a volume as well +as clients accessing the entire cluster's storage. In such a +situation, it makes sense to store locally created files on the local +workstation itself (assuming files are accessed most by the +workstation that created them). The Non-Uniform File Allocation (@acronym{NUFA}) +scheduler accomplishes that. + +@acronym{NUFA} gives the local system first priority for file creation +over other nodes. If the local volume does not have more free disk space +than a specified amount (5% by default) then @acronym{NUFA} schedules files +among the other child volumes in a round-robin fashion. + +@acronym{NUFA} is named after the similar strategy used for memory access, +@acronym{NUMA}@footnote{Non-Uniform Memory Access: +@indicateurl{http://en.wikipedia.org/wiki/Non-Uniform_Memory_Access}}. + +@cartouche +@table @code +@item nufa.limits.min-free-disk <%> (5) +Minimum disk space that must be free (local or remote) for @acronym{NUFA} to schedule a +file to it. +@item nufa.refresh-interval <t> (10 seconds) +Time between two successive free disk space checks. +@item nufa.local-volume-name <volume> +The name of the volume corresponding to the local system. This volume must be +one of the children of the unify volume. This option is mandatory. +@end table +@end cartouche + +@cindex namespace +@subsubsection Namespace +Namespace volume needed because: + - persistent inode numbers. + - file exists even when node is down. + +namespace files are simply touched. on every lookup it is checked. + +@cartouche +@table @code +@item namespace <volume> * +Name of the namespace volume (which should be one of the unify volume's children). +@item self-heal [on|off] (on) +Enable/disable self-heal. Unless you know what you are doing, do not disable self-heal. +@end table +@end cartouche + +@cindex self heal (unify) +@subsubsection Self Heal + * When a 'lookup()/stat()' call is made on directory for the first +time, a self-heal call is made, which checks for the consistancy of +its child nodes. If an entry is present in storage node, but not in +namespace, that entry is created in namespace, and vica-versa. There +is an writedir() API introduced which is used for the same. It also +checks for permissions, and uid/gid consistencies. + + * This check is also done when an server goes down and comes up. + + * If one starts with an empty namespace export, but has data in +storage nodes, a 'find .>/dev/null' or 'ls -lR >/dev/null' should help +to build namespace in one shot. Even otherwise, namespace is built on +demand when a file is looked up for the first time. + +NOTE: There are some issues (Kernel 'Oops' msgs) seen with fuse-2.6.3, +when someone deletes namespace in backend, when glusterfs is +running. But with fuse-2.6.5, this issue is not there. + +@node Replicate +@subsection Replicate (formerly AFR) +@cindex Replicate +@example +type cluster/replicate +@end example + +Replicate provides @acronym{RAID}-1 like functionality for +GlusterFS. Replicate replicates files and directories across the +subvolumes. Hence if Replicate has four subvolumes, there will be +four copies of all files and directories. Replicate provides +high-availability, i.e., in case one of the subvolumes go down +(e. g. server crash, network disconnection) Replicate will still +service the requests using the redundant copies. + +Replicate also provides self-heal functionality, i.e., in case the +crashed servers come up, the outdated files and directories will be +updated with the latest versions. Replicate uses extended +attributes of the backend file system to track the versioning of files +and directories and provide the self-heal feature. + +@example +volume replicate-example + type cluster/replicate + subvolumes brick1 brick2 brick3 +end-volume +@end example + +This sample configuration will replicate all directories and files on +brick1, brick2 and brick3. + +All the read operations happen from the first alive child. If all the +three sub-volumes are up, reads will be done from brick1; if brick1 is +down read will be done from brick2. In case read() was being done on +brick1 and it goes down, replicate transparently falls back to +brick2. + +The next release of GlusterFS will add the following features: +@itemize +@item Ability to specify the sub-volume from which read operations are to be done (this will help users who have one of the sub-volumes as a local storage volume). +@item Allow scheduling of read operations amongst the sub-volumes in a round-robin fashion. +@end itemize + +The order of the subvolumes list should be same across all the 'replicate's as +they will be used for locking purposes. + +@cindex self heal (replicate) +@subsubsection Self Heal +Replicate has self-heal feature, which updates the outdated file and +directory copies by the most recent versions. For example consider the +following config: + +@example +volume replicate-example + type cluster/replicate + subvolumes brick1 brick2 +end-volume +@end example + +@subsubsection File self-heal + +Now if we create a file foo.txt on replicate-example, the file will be created +on brick1 and brick2. The file will have two extended attributes associated +with it in the backend filesystem. One is trusted.afr.createtime and the +other is trusted.afr.version. The trusted.afr.createtime xattr has the +create time (in terms of seconds since epoch) and trusted.afr.version +is a number that is incremented each time a file is modified. This increment +happens during close (incase any write was done before close). + +If brick1 goes down, we edit foo.txt the version gets incremented. Now +the brick1 comes back up, when we open() on foo.txt replicate will check if +their versions are same. If they are not same, the outdated copy is +replaced by the latest copy and its version is updated. After the sync +the open() proceeds in the usual manner and the application calling open() +can continue on its access to the file. + +If brick1 goes down, we delete foo.txt and create a file with the same +name again i.e foo.txt. Now brick1 comes back up, clearly there is a +chance that the version on brick1 being more than the version on brick2, +this is where createtime extended attribute helps in deciding which +the outdated copy is. Hence we need to consider both createtime and +version to decide on the latest copy. + +The version attribute is incremented during the close() call. Version +will not be incremented in case there was no write() done. In case the +fd that the close() gets was got by create() call, we also create +the createtime extended attribute. + +@subsubsection Directory self-heal + +Suppose brick1 goes down, we delete foo.txt, brick1 comes back up, now +we should not create foo.txt on brick2 but we should delete foo.txt +on brick1. We handle this situation by having the createtime and version +attribute on the directory similar to the file. when lookup() is done +on the directory, we compare the createtime/version attributes of the +copies and see which files needs to be deleted and delete those files +and update the extended attributes of the outdated directory copy. +Each time a directory is modified (a file or a subdirectory is created +or deleted inside the directory) and one of the subvols is down, we +increment the directory's version. + +lookup() is a call initiated by the kernel on a file or directory +just before any access to that file or directory. In glusterfs, by +default, lookup() will not be called in case it was called in the +past one second on that particular file or directory. + +The extended attributes can be seen in the backend filesystem using +the @command{getfattr} command. (@command{getfattr -n trusted.afr.version <file>}) + +@cartouche +@table @code +@item debug [on|off] (off) +@item self-heal [on|off] (on) +@item replicate <pattern> (*:1) +@item lock-node <child_volume> (first child is used by default) +@end table +@end cartouche + +@node Stripe +@subsection Stripe +@cindex stripe (translator) +@example +type cluster/stripe +@end example + +The stripe translator distributes the contents of a file over its +sub-volumes. It does this by creating a file equal in size to the +total size of the file on each of its sub-volumes. It then writes only +a part of the file to each sub-volume, leaving the rest of it empty. +These empty regions are called `holes' in Unix terminology. The holes +do not consume any disk space. + +The diagram below makes this clear. + +@center @image{stripe,44pc,,,.pdf} + +You can configure stripe so that only filenames matching a pattern +are striped. You can also configure the size of the data to be stored +on each sub-volume. + +@cartouche +@table @code +@item block-size <pattern>:<size> (*:0 no striping) +Distribute files matching @command{<pattern>} over the sub-volumes, +storing at least @command{<size>} on each sub-volume. For example, + +@example + option block-size *.mpg:1M +@end example + +distributes all files ending in @command{.mpg}, storing at least 1 MB on +each sub-volume. + +Any number of @command{block-size} option lines may be present, specifying +different sizes for different file name patterns. +@end table +@end cartouche + +@node Performance Translators +@section Performance Translators + +@menu +* Read Ahead:: +* Write Behind:: +* IO Threads:: +* IO Cache:: +* Booster:: +@end menu + +@node Read Ahead +@subsection Read Ahead +@cindex read-ahead (translator) +@example +type performance/read-ahead +@end example + +The read-ahead translator pre-fetches data in advance on every read. +This benefits applications that mostly process files in sequential order, +since the next block of data will already be available by the time the +application is done with the current one. + +Additionally, the read-ahead translator also behaves as a read-aggregator. +Many small read operations are combined and issued as fewer, larger read +requests to the server. + +Read-ahead deals in ``pages'' as the unit of data fetched. The page size +is configurable, as is the ``page count'', which is the number of pages +that are pre-fetched. + +Read-ahead is best used with InfiniBand (using the ib-verbs transport). +On FastEthernet and Gigabit Ethernet networks, +GlusterFS can achieve the link-maximum throughput even without +read-ahead, making it quite superflous. + +Note that read-ahead only happens if the reads are perfectly +sequential. If your application accesses data in a random fashion, +using read-ahead might actually lead to a performance loss, since +read-ahead will pointlessly fetch pages which won't be used by the +application. + +@cartouche +Options: +@table @code +@item page-size <n> (256KB) +The unit of data that is pre-fetched. +@item page-count <n> (2) +The number of pages that are pre-fetched. +@item force-atime-update [on|off|yes|no] (off|no) +Whether to force an access time (atime) update on the file on every read. Without +this, the atime will be slightly imprecise, as it will reflect the time when +the read-ahead translator read the data, not when the application actually read it. +@end table +@end cartouche + +@node Write Behind +@subsection Write Behind +@cindex write-behind (translator) +@example +type performance/write-behind +@end example + +The write-behind translator improves the latency of a write operation. +It does this by relegating the write operation to the background and +returning to the application even as the write is in progress. Using the +write-behind translator, successive write requests can be pipelined. +This mode of write-behind operation is best used on the client side, to +enable decreased write latency for the application. + +The write-behind translator can also aggregate write requests. If the +@command{aggregate-size} option is specified, then successive writes upto that +size are accumulated and written in a single operation. This mode of operation +is best used on the server side, as this will decrease the disk's head movement +when multiple files are being written to in parallel. + +The @command{aggregate-size} option has a default value of 128KB. Although +this works well for most users, you should always experiment with different values +to determine the one that will deliver maximum performance. This is because the +performance of write-behind depends on your interconnect, size of RAM, and the +work load. + +@cartouche +@table @code +@item aggregate-size <n> (128KB) +Amount of data to accumulate before doing a write +@item flush-behind [on|yes|off|no] (off|no) + +@end table +@end cartouche + +@node IO Threads +@subsection IO Threads +@cindex io-threads (translator) +@example +type performance/io-threads +@end example + +The IO threads translator is intended to increase the responsiveness +of the server to metadata operations by doing file I/O (read, write) +in a background thread. Since the GlusterFS server is +single-threaded, using the IO threads translator can significantly +improve performance. This translator is best used on the server side, +loaded just below the server protocol translator. + +IO threads operates by handing out read and write requests to a separate thread. +The total number of threads in existence at a time is constant, and configurable. + +@cartouche +@table @code +@item thread-count <n> (1) +Number of threads to use. +@end table +@end cartouche + +@node IO Cache +@subsection IO Cache +@cindex io-cache (translator) +@example +type performance/io-cache +@end example + +The IO cache translator caches data that has been read. This is useful +if many applications read the same data multiple times, and if reads +are much more frequent than writes (for example, IO caching may be +useful in a web hosting environment, where most clients will simply +read some files and only a few will write to them). + +The IO cache translator reads data from its child in @command{page-size} chunks. +It caches data upto @command{cache-size} bytes. The cache is maintained as +a prioritized least-recently-used (@acronym{LRU}) list, with priorities determined +by user-specified patterns to match filenames. + +When the IO cache translator detects a write operation, the +cache for that file is flushed. + +The IO cache translator periodically verifies the consistency of +cached data, using the modification times on the files. The verification timeout +is configurable. + +@cartouche +@table @code +@item page-size <n> (128KB) +Size of a page. +@item cache-size (n) (32MB) +Total amount of data to be cached. +@item force-revalidate-timeout <n> (1) +Timeout to force a cache consistency verification, in seconds. +@item priority <pattern> (*:0) +Filename patterns listed in order of priority. +@end table +@end cartouche + +@node Booster +@subsection Booster +@cindex booster +@example + type performance/booster +@end example + +The booster translator gives applications a faster path to communicate +read and write requests to GlusterFS. Normally, all requests to GlusterFS from +applications go through FUSE, as indicated in @ref{Filesystems in Userspace}. +Using the booster translator in conjunction with the GlusterFS booster shared +library, an application can bypass the FUSE path and send read/write requests +directly to the GlusterFS client process. + +The booster mechanism consists of two parts: the booster translator, +and the booster shared library. The booster translator is meant to be +loaded on the client side, usually at the root of the translator tree. +The booster shared library should be @command{LD_PRELOAD}ed with the +application. + +The booster translator when loaded opens a Unix domain socket and +listens for read/write requests on it. The booster shared library +intercepts read and write system calls and sends the requests to the +GlusterFS process directly using the Unix domain socket, bypassing FUSE. +This leads to superior performance. + +Once you've loaded the booster translator in your volume specification file, you +can start your application as: + +@example + $ LD_PRELOAD=/usr/local/bin/glusterfs-booster.so your_app +@end example + +The booster translator accepts no options. + +@node Features Translators +@section Features Translators + +@menu +* POSIX Locks:: +* Fixed ID:: +@end menu + +@node POSIX Locks +@subsection POSIX Locks +@cindex record locking +@cindex fcntl +@cindex posix-locks (translator) +@example +type features/posix-locks +@end example + +This translator provides storage independent POSIX record locking +support (@command{fcntl} locking). Typically you'll want to load this on the +server side, just above the @acronym{POSIX} storage translator. Using this +translator you can get both advisory locking and mandatory locking +support. It also handles @command{flock()} locks properly. + +Caveat: Consider a file that does not have its mandatory locking bits +(+setgid, -group execution) turned on. Assume that this file is now +opened by a process on a client that has the write-behind xlator +loaded. The write-behind xlator does not cache anything for files +which have mandatory locking enabled, to avoid incoherence. Let's say +that mandatory locking is now enabled on this file through another +client. The former client will not know about this change, and +write-behind may erroneously report a write as being successful when +in fact it would fail due to the region it is writing to being locked. + +There seems to be no easy way to fix this. To work around this +problem, it is recommended that you never enable the mandatory bits on +a file while it is open. + +@cartouche +@table @code +@item mandatory [on|off] (on) +Turns mandatory locking on. +@end table +@end cartouche + +@node Fixed ID +@subsection Fixed ID +@cindex fixed-id (translator) +@example +type features/fixed-id +@end example + +The fixed ID translator makes all filesystem requests from the client +to appear to be coming from a fixed, specified +@acronym{UID}/@acronym{GID}, regardless of which user actually +initiated the request. + +@cartouche +@table @code +@item fixed-uid <n> [if not set, not used] +The @acronym{UID} to send to the server +@item fixed-gid <n> [if not set, not used] +The @acronym{GID} to send to the server +@end table +@end cartouche + +@node Miscellaneous Translators +@section Miscellaneous Translators + +@menu +* ROT-13:: +* Trace:: +@end menu + +@node ROT-13 +@subsection ROT-13 +@cindex rot-13 (translator) +@example +type encryption/rot-13 +@end example + +@acronym{ROT-13} is a toy translator that can ``encrypt'' and ``decrypt'' file +contents using the @acronym{ROT-13} algorithm. @acronym{ROT-13} is a trivial +algorithm that rotates each alphabet by thirteen places. Thus, 'A' becomes 'N', +'B' becomes 'O', and 'Z' becomes 'M'. + +It goes without saying that you shouldn't use this translator if you need +@emph{real} encryption (a future release of GlusterFS will have real encryption +translators). + +@cartouche +@table @code +@item encrypt-write [on|off] (on) +Whether to encrypt on write +@item decrypt-read [on|off] (on) +Whether to decrypt on read +@end table +@end cartouche + +@node Trace +@subsection Trace +@cindex trace (translator) +@example +type debug/trace +@end example + +The trace translator is intended for debugging purposes. When loaded, it +logs all the system calls received by the server or client (wherever +trace is loaded), their arguments, and the results. You must use a GlusterFS log +level of DEBUG (See @ref{Running GlusterFS}) for trace to work. + +Sample trace output (lines have been wrapped for readability): +@cartouche +@example +2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68 +(*this=0x8059e40, loc=0x8091984 @{path=/iozone3_283, inode=0x8091f00@}, + fd=0x8091d50) + +2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace: +(*this=0x8059e40, op_ret=4, op_errno=1, fd=0x8091d50) + +2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69 +(*this=0x8059e40, size=4096, offset=0 fd=0x8091d50) + +2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace: +(*this=0x8059e40, op_ret=0, op_errno=0, count=4) + +2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71 +(*this=0x8059e40, *fd=0x8091d50) + +2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace: +(*this=0x8059e40, op_ret=0, op_errno=1) +@end example +@end cartouche + +@node Usage Scenarios +@chapter Usage Scenarios + +@section Advanced Striping + +This section is based on the Advanced Striping tutorial written by +Anand Avati on the GlusterFS wiki +@footnote{http://gluster.org/docs/index.php/Mixing_Striped_and_Regular_Files}. + +@subsection Mixed Storage Requirements + +There are two ways of scheduling the I/O. One at file level (using +unify translator) and other at block level (using stripe +translator). Striped I/O is good for files that are potentially large +and require high parallel throughput (for example, a single file of +400GB being accessed by 100s and 1000s of systems simultaneously and +randomly). For most of the cases, file level scheduling works best. + +In the real world, it is desirable to mix file level and block level +scheduling on a single storage volume. Alternatively users can choose +to have two separate volumes and hence two mount points, but the +applications may demand a single storage system to host both. + +This document explains how to mix file level scheduling with stripe. + +@subsection Configuration Brief + +This setup demonstrates how users can configure unify translator with +appropriate I/O scheduler for file level scheduling and strip for only +matching patterns. This way, GlusterFS chooses appropriate I/O profile +and knows how to efficiently handle both the types of data. + +A simple technique to achieve this effect is to create a stripe set of +unify and stripe blocks, where unify is the first sub-volume. Files +that do not match the stripe policy passed on to first unify +sub-volume and inturn scheduled arcoss the cluster using its file +level I/O scheduler. + +@image{advanced-stripe,44pc,,,.pdf} + +@subsection Preparing GlusterFS Envoronment + +Create the directories /export/namespace, /export/unify and +/export/stripe on all the storage bricks. + + Place the following server and client volume spec file under +/etc/glusterfs (or appropriate installed path) and replace the IP +addresses / access control fields to match your environment. + +@cartouche +@example + ## file: /etc/glusterfs/glusterfsd.vol + volume posix-unify + type storage/posix + option directory /export/for-unify + end-volume + + volume posix-stripe + type storage/posix + option directory /export/for-stripe + end-volume + + volume posix-namespace + type storage/posix + option directory /export/for-namespace + end-volume + + volume server + type protocol/server + option transport-type tcp + option auth.addr.posix-unify.allow 192.168.1.* + option auth.addr.posix-stripe.allow 192.168.1.* + option auth.addr.posix-namespace.allow 192.168.1.* + subvolumes posix-unify posix-stripe posix-namespace + end-volume +@end example +@end cartouche + +@cartouche +@example + ## file: /etc/glusterfs/glusterfs.vol + volume client-namespace + type protocol/client + option transport-type tcp + option remote-host 192.168.1.1 + option remote-subvolume posix-namespace + end-volume + + volume client-unify-1 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.1 + option remote-subvolume posix-unify + end-volume + + volume client-unify-2 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.2 + option remote-subvolume posix-unify + end-volume + + volume client-unify-3 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.3 + option remote-subvolume posix-unify + end-volume + + volume client-unify-4 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.4 + option remote-subvolume posix-unify + end-volume + + volume client-stripe-1 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.1 + option remote-subvolume posix-stripe + end-volume + + volume client-stripe-2 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.2 + option remote-subvolume posix-stripe + end-volume + + volume client-stripe-3 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.3 + option remote-subvolume posix-stripe + end-volume + + volume client-stripe-4 + type protocol/client + option transport-type tcp + option remote-host 192.168.1.4 + option remote-subvolume posix-stripe + end-volume + + volume unify + type cluster/unify + option scheduler rr + subvolumes cluster-unify-1 cluster-unify-2 cluster-unify-3 cluster-unify-4 + end-volume + + volume stripe + type cluster/stripe + option block-size *.img:2MB # All files ending with .img are striped with 2MB stripe block size. + subvolumes unify cluster-stripe-1 cluster-stripe-2 cluster-stripe-3 cluster-stripe-4 + end-volume +@end example +@end cartouche + + +Bring up the Storage + +Starting GlusterFS Server: If you have installed through binary +package, you can start the service through init.d startup script. If +not: + +@example +[root@@server]# glusterfsd +@end example + +Mounting GlusterFS Volumes: + +@example +[root@@client]# glusterfs -s [BRICK-IP-ADDRESS] /mnt/cluster +@end example + +Improving upon this Setup + +Infiniband Verbs RDMA transport is much faster than TCP/IP GigE +transport. + +Use of performance translators such as read-ahead, write-behind, +io-cache, io-threads, booster is recommended. + +Replace round-robin (rr) scheduler with ALU to handle more dynamic +storage environments. + +@node Troubleshooting +@chapter Troubleshooting + +This chapter is a general troubleshooting guide to GlusterFS. It lists +common GlusterFS server and client error messages, debugging hints, and +concludes with the suggested procedure to report bugs in GlusterFS. + +@section GlusterFS error messages + +@subsection Server errors + +@example +glusterfsd: FATAL: could not open specfile: +'/etc/glusterfs/glusterfsd.vol' +@end example + +The GlusterFS server expects the volume specification file to be +at @command{/etc/glusterfs/glusterfsd.vol}. The example +specification file will be installed as +@command{/etc/glusterfs/glusterfsd.vol.sample}. You need to edit +it and rename it, or provide a different specification file using +the @command{--spec-file} command line option (See @ref{Server}). + +@vskip 4ex + +@example +gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log" + (Permission denied) +@end example + +You don't have permission to create files in the +@command{/usr/var/log/glusterfs} directory. Make sure you are running +GlusterFS as root. Alternatively, specify a different path for the log +file using the @command{--log-file} option (See @ref{Server}). + +@subsection Client errors + +@example +fusermount: failed to access mountpoint /mnt: + Transport endpoint is not connected +@end example + +A previous failed (or hung) mount of GlusterFS is preventing it from being +mounted again in the same location. The fix is to do: + +@example +# umount /mnt +@end example + +and try mounting again. + +@vskip 4ex + +@strong{``Transport endpoint is not connected''.} + +If you get this error when you try a command such as @command{ls} or @command{cat}, +it means the GlusterFS mount did not succeed. Try running GlusterFS in @command{DEBUG} +logging level and study the log messages to discover the cause. + +@vskip 4ex + +@strong{``Connect to server failed'', ``SERVER-ADDRESS: Connection refused''.} + +GluserFS Server is not running or dead. Check your network +connections and firewall settings. To check if the server is reachable, +try: + +@example +telnet IP-ADDRESS 24007 +@end example + +If the server is accessible, your `telnet' command should connect and +block. If not you will see an error message such as @command{telnet: Unable to +connect to remote host: Connection refused}. 24007 is the default +GlusterFS port. If you have changed it, then use the corresponding +port instead. + +@vskip 4ex + +@example +gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log" + (Permission denied) +@end example + +You don't have permission to create files in the +@command{/usr/var/log/glusterfs} directory. Make sure you are running +GlusterFS as root. Alternatively, specify a different path for the log +file using the @command{--log-file} option (See @ref{Client}). + +@section FUSE error messages +@command{modprobe fuse} fails with: ``Unknown symbol in module, or unknown parameter''. +@cindex Redhat Enterprise Linux + +If you are using fuse-2.6.x on Redhat Enterprise Linux Work Station 4 +and Advanced Server 4 with 2.6.9-42.ELlargesmp, 2.6.9-42.ELsmp, +2.6.9-42.EL kernels and get this error while loading @acronym{FUSE} kernel +module, you need to apply the following patch. + +For fuse-2.6.2: + +@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch} + +For fuse-2.6.3: + +@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch} + +@section AppArmour and GlusterFS +@cindex AppArmour +@cindex OpenSuSE +Under OpenSuSE GNU/Linux, the AppArmour security feature does not +allow GlusterFS to create temporary files or network socket +connections even while running as root. You will see error messages +like `Unable to open log file: Operation not permitted' or `Connection +refused'. Disabling AppArmour using YaST or properly configuring +AppArmour to recognize @command{glusterfsd} or @command{glusterfs}/@command{fusermount} +should solve the problem. + +@section Reporting a bug + +If you encounter a bug in GlusterFS, please follow the below +guidelines when you report it to the mailing list. Be sure to report +it! User feedback is crucial to the health of the project and we value +it highly. + +@subsection General instructions + +When running GlusterFS in a non-production environment, be sure to +build it with the following command: + +@example + $ make CFLAGS='-g -O0 -DDEBUG' +@end example + +This includes debugging information which will be helpful in getting +backtraces (see below) and also disable optimization. Enabling +optimization can result in incorrect line numbers being reported to +gdb. + +@subsection Volume specification files + +Attach all relevant server and client spec files you were using when +you encountered the bug. Also tell us details of your setup, i.e., how +many clients and how many servers. + +@subsection Log files + +Set the loglevel of your client and server programs to @acronym{DEBUG} (by +passing the -L @acronym{DEBUG} option) and attach the log files with your bug +report. Obviously, if only the client is failing (for example), you +only need to send us the client log file. + +@subsection Backtrace + +If GlusterFS has encountered a segmentation fault or has crashed for +some other reason, include the backtrace with the bug report. You can +get the backtrace using the following procedure. + +Run the GlusterFS client or server inside gdb. + +@example + $ gdb ./glusterfs + (gdb) set args -f client.spec -N -l/path/to/log/file -LDEBUG /mnt/point + (gdb) run +@end example + +Now when the process segfaults, you can get the backtrace by typing: + +@example + (gdb) bt +@end example + +If the GlusterFS process has crashed and dumped a core file (you can +find this in / if running as a daemon and in the current directory +otherwise), you can do: + +@example + $ gdb /path/to/glusterfs /path/to/core.<pid> +@end example + +and then get the backtrace. + +If the GlusterFS server or client seems to be hung, then you can get +the backtrace by attaching gdb to the process. First get the @command{PID} of +the process (using ps), and then do: + +@example + $ gdb ./glusterfs <pid> +@end example + +Press Ctrl-C to interrupt the process and then generate the backtrace. + +@subsection Reproducing the bug + +If the bug is reproducible, please include the steps necessary to do +so. If the bug is not reproducible, send us the bug report anyway. + +@subsection Other information + +If you think it is relevant, send us also the version of @acronym{FUSE} you're +using, the kernel version, platform. + +@node GNU Free Documentation Licence +@appendix GNU Free Documentation Licence +@include fdl.texi + +@node Index +@unnumbered Index +@printindex cp + +@bye diff --git a/doc/legacy/xlator.odg b/doc/legacy/xlator.odg Binary files differnew file mode 100644 index 00000000000..179a65f6e26 --- /dev/null +++ b/doc/legacy/xlator.odg diff --git a/doc/legacy/xlator.pdf b/doc/legacy/xlator.pdf Binary files differnew file mode 100644 index 00000000000..a07e14d67d2 --- /dev/null +++ b/doc/legacy/xlator.pdf |