diff options
author | Kotresh HR <khiremat@redhat.com> | 2018-11-17 13:14:24 +0530 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2018-12-04 09:15:44 +0000 |
commit | 54ebd47e4154c8b37680b2bac43775fc92ced153 (patch) | |
tree | d7ea19c495c2798d76f12b714b2e06eebe6a021b | |
parent | 748e993d1f30197c533933ddae889b317ccd00d3 (diff) |
geo-rep: Fix syncing of files with non-ascii filenames
Problem:
Creation of files/directories with non-ascii names fails
to sync to the slave. It crashes with below traceback on
slave.
...
File "/usr/lib/x86_64-linux-gnu/glusterfs/python/syncdaemon/repce.py", line 118, in worker
res = getattr(self.obj, rmeth)(*in_data[2:])
File "/usr/lib/x86_64-linux-gnu/glusterfs/python/syncdaemon/resource.py", line 709, in entry_ops
[ESTALE, EINVAL, EBUSY])
File "/usr/lib/x86_64-linux-gnu/glusterfs/python/syncdaemon/syncdutils.py", line 546, in errno_wrap
return call(*arg)
File "/usr/lib/x86_64-linux-gnu/glusterfs/python/syncdaemon/libcxattr.py", line 83, in lsetxattr
cls.raise_oserr()
File "/usr/lib/x86_64-linux-gnu/glusterfs/python/syncdaemon/libcxattr.py", line 38, in raise_oserr
raise OSError(errn, os.strerror(errn))
OSError: [Errno 12] Cannot allocate memory
Cause:
The length calculation arguments passed to blob creation was done before encoding. Hence
was failing in gfid-access layer.
Fix:
It appears that the calculating lenght properly fixes this issue. But it will cause
issues in other places in 'python2' and not in 'python3'. So encoding and decoding
each required string to make geo-rep compatible with both 'python2' and 'python3'
is a nightmare and is not fool proof. Hence kept 'python2' code as is with out
encode/decode and applied encode/decode only to 'python3'
Added non-ascii filename tests to regression
fixes: bz#1650893
Change-Id: I35cfaf848e07b1a0b5cb93c01b98b472f08271a6
Signed-off-by: Kotresh HR <khiremat@redhat.com>
-rw-r--r-- | geo-replication/syncdaemon/libcxattr.py | 16 | ||||
-rw-r--r-- | geo-replication/syncdaemon/libgfchangelog.py | 30 | ||||
-rw-r--r-- | geo-replication/syncdaemon/py2py3.py | 141 | ||||
-rw-r--r-- | geo-replication/syncdaemon/resource.py | 55 | ||||
-rw-r--r-- | tests/geo-rep.rc | 21 |
5 files changed, 194 insertions, 69 deletions
diff --git a/geo-replication/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py index 7f3f6ce453a..c7d69d7eb2e 100644 --- a/geo-replication/syncdaemon/libcxattr.py +++ b/geo-replication/syncdaemon/libcxattr.py @@ -9,9 +9,9 @@ # import os -from ctypes import CDLL, create_string_buffer, get_errno -import py2py3 -from py2py3 import bytearray_to_str +from ctypes import CDLL, get_errno +from py2py3 import bytearray_to_str, gr_create_string_buffer +from py2py3 import gr_query_xattr, gr_lsetxattr, gr_lremovexattr class Xattr(object): @@ -40,7 +40,7 @@ class Xattr(object): @classmethod def _query_xattr(cls, path, siz, syscall, *a): if siz: - buf = create_string_buffer(b'\0' * siz) + buf = gr_create_string_buffer(siz) else: buf = None ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz))) @@ -56,7 +56,7 @@ class Xattr(object): @classmethod def lgetxattr(cls, path, attr, siz=0): - return cls._query_xattr(path.encode(), siz, 'lgetxattr', attr.encode()) + return gr_query_xattr(cls, path, siz, 'lgetxattr', attr) @classmethod def lgetxattr_buf(cls, path, attr): @@ -70,7 +70,7 @@ class Xattr(object): @classmethod def llistxattr(cls, path, siz=0): - ret = cls._query_xattr(path.encode(), siz, 'llistxattr') + ret = gr_query_xattr(cls, path, siz, 'llistxattr') if isinstance(ret, str): ret = ret.strip('\0') ret = ret.split('\0') if ret else [] @@ -78,13 +78,13 @@ class Xattr(object): @classmethod def lsetxattr(cls, path, attr, val): - ret = cls.libc.lsetxattr(path.encode(), attr.encode(), val, len(val), 0) + ret = gr_lsetxattr(cls, path, attr, val) if ret == -1: cls.raise_oserr() @classmethod def lremovexattr(cls, path, attr): - ret = cls.libc.lremovexattr(path.encode(), attr.encode()) + ret = gr_lremovexattr(cls, path, attr) if ret == -1: cls.raise_oserr() diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py index cc40fd5475d..fff9d24e54f 100644 --- a/geo-replication/syncdaemon/libgfchangelog.py +++ b/geo-replication/syncdaemon/libgfchangelog.py @@ -9,9 +9,10 @@ # import os -from ctypes import CDLL, RTLD_GLOBAL, create_string_buffer, \ - get_errno, byref, c_ulong +from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong from syncdutils import ChangelogException, ChangelogHistoryNotAvailable +from py2py3 import gr_cl_history_changelog, gr_cl_done, gr_create_string_buffer +from py2py3 import gr_cl_register, gr_cl_history_done, bytearray_to_str class Changes(object): @@ -39,9 +40,7 @@ class Changes(object): @classmethod def cl_register(cls, brick, path, log_file, log_level, retries=0): - ret = cls._get_api('gf_changelog_register')(brick.encode(), path.encode(), - log_file.encode(), - log_level, retries) + ret = gr_cl_register(cls, brick, path, log_file, log_level, retries) if ret == -1: cls.raise_changelog_err() @@ -63,14 +62,16 @@ class Changes(object): def clsort(f): return f.split('.')[-1] changes = [] - buf = create_string_buffer(b'\0' * 4096) + buf = gr_create_string_buffer(4096) call = cls._get_api('gf_changelog_next_change') while True: ret = call(buf, 4096) if ret in (0, -1): break - changes.append(buf.raw[:ret - 1].decode()) + # py2 and py3 compatibility + result = bytearray_to_str(buf.raw[:ret - 1]) + changes.append(result) if ret == -1: cls.raise_changelog_err() # cleanup tracker @@ -79,7 +80,7 @@ class Changes(object): @classmethod def cl_done(cls, clfile): - ret = cls._get_api('gf_changelog_done')(clfile.encode()) + ret = gr_cl_done(cls, clfile) if ret == -1: cls.raise_changelog_err() @@ -94,9 +95,8 @@ class Changes(object): @classmethod def cl_history_changelog(cls, changelog_path, start, end, num_parallel): actual_end = c_ulong() - ret = cls._get_api('gf_history_changelog')(changelog_path.encode(), start, end, - num_parallel, - byref(actual_end)) + ret = gr_cl_history_changelog(cls, changelog_path, start, end, + num_parallel, byref(actual_end)) if ret == -1: cls.raise_changelog_err() @@ -118,14 +118,16 @@ class Changes(object): return f.split('.')[-1] changes = [] - buf = create_string_buffer(b'\0' * 4096) + buf = gr_create_string_buffer(4096) call = cls._get_api('gf_history_changelog_next_change') while True: ret = call(buf, 4096) if ret in (0, -1): break - changes.append(buf.raw[:ret - 1].decode()) + # py2 and py3 compatibility + result = bytearray_to_str(buf.raw[:ret - 1]) + changes.append(result) if ret == -1: cls.raise_changelog_err() @@ -133,6 +135,6 @@ class Changes(object): @classmethod def cl_history_done(cls, clfile): - ret = cls._get_api('gf_history_changelog_done')(clfile.encode()) + ret = gr_cl_history_done(cls, clfile) if ret == -1: cls.raise_changelog_err() diff --git a/geo-replication/syncdaemon/py2py3.py b/geo-replication/syncdaemon/py2py3.py index 4c0e1152aa8..faad750059c 100644 --- a/geo-replication/syncdaemon/py2py3.py +++ b/geo-replication/syncdaemon/py2py3.py @@ -12,7 +12,10 @@ import sys import os - +import stat +import struct +from syncdutils import umask +from ctypes import create_string_buffer if sys.version_info >= (3,): def pipe(): @@ -35,6 +38,77 @@ if sys.version_info >= (3,): def str_to_bytearray(string): return bytes([ord(c) for c in string]) + def gr_create_string_buffer(size): + return create_string_buffer(b'\0', size) + + def gr_query_xattr(cls, path, size, syscall, attr=None): + if attr: + return cls._query_xattr(path.encode(), size, syscall, + attr.encode()) + else: + return cls._query_xattr(path.encode(), size, syscall) + + def gr_lsetxattr(cls, path, attr, val): + return cls.libc.lsetxattr(path.encode(), attr.encode(), val, + len(val), 0) + + def gr_lremovexattr(cls, path, attr): + return cls.libc.lremovexattr(path.encode(), attr.encode()) + + def gr_cl_register(cls, brick, path, log_file, log_level, retries): + return cls._get_api('gf_changelog_register')(brick.encode(), + path.encode(), + log_file.encode(), + log_level, retries) + + def gr_cl_done(cls, clfile): + return cls._get_api('gf_changelog_done')(clfile.encode()) + + def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel, + actual_end): + return cls._get_api('gf_history_changelog')(changelog_path.encode(), + start, end, num_parallel, + actual_end) + + def gr_cl_history_done(cls, clfile): + return cls._get_api('gf_history_changelog_done')(clfile.encode()) + + # regular file + + def entry_pack_reg(cls, gf, bn, mo, uid, gid): + bn_encoded = bn.encode() + blen = len(bn_encoded) + return struct.pack(cls._fmt_mknod(blen), + uid, gid, gf.encode(), mo, bn_encoded, + stat.S_IMODE(mo), 0, umask()) + + def entry_pack_reg_stat(cls, gf, bn, st): + bn_encoded = bn.encode() + blen = len(bn_encoded) + mo = st['mode'] + return struct.pack(cls._fmt_mknod(blen), + st['uid'], st['gid'], + gf.encode(), mo, bn_encoded, + stat.S_IMODE(mo), 0, umask()) + # mkdir + + def entry_pack_mkdir(cls, gf, bn, mo, uid, gid): + bn_encoded = bn.encode() + blen = len(bn_encoded) + return struct.pack(cls._fmt_mkdir(blen), + uid, gid, gf.encode(), mo, bn_encoded, + stat.S_IMODE(mo), umask()) + # symlink + + def entry_pack_symlink(cls, gf, bn, lnk, st): + bn_encoded = bn.encode() + blen = len(bn_encoded) + lnk_encoded = lnk.encode() + llen = len(lnk_encoded) + return struct.pack(cls._fmt_symlink(blen, llen), + st['uid'], st['gid'], + gf.encode(), st['mode'], bn_encoded, + lnk_encoded) else: def pipe(): (r, w) = os.pipe() @@ -42,8 +116,69 @@ else: # Raw conversion of bytearray to string def bytearray_to_str(byte_arr): - return ''.join([b for b in byte_arr]) + return byte_arr # Raw conversion of string to bytearray def str_to_bytearray(string): - return b"".join([c for c in string]) + return string + + def gr_create_string_buffer(size): + return create_string_buffer('\0', size) + + def gr_query_xattr(cls, path, size, syscall, attr=None): + if attr: + return cls._query_xattr(path, size, syscall, attr) + else: + return cls._query_xattr(path, size, syscall) + + def gr_lsetxattr(cls, path, attr, val): + return cls.libc.lsetxattr(path, attr, val, len(val), 0) + + def gr_lremovexattr(cls, path, attr): + return cls.libc.lremovexattr(path, attr) + + def gr_cl_register(cls, brick, path, log_file, log_level, retries): + return cls._get_api('gf_changelog_register')(brick, path, log_file, + log_level, retries) + + def gr_cl_done(cls, clfile): + return cls._get_api('gf_changelog_done')(clfile) + + def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel, + actual_end): + return cls._get_api('gf_history_changelog')(changelog_path, start, end, + num_parallel, actual_end) + + def gr_cl_history_done(cls, clfile): + return cls._get_api('gf_history_changelog_done')(clfile) + + # regular file + + def entry_pack_reg(cls, gf, bn, mo, uid, gid): + blen = len(bn) + return struct.pack(cls._fmt_mknod(blen), + uid, gid, gf, mo, bn, + stat.S_IMODE(mo), 0, umask()) + + def entry_pack_reg_stat(cls, gf, bn, st): + blen = len(bn) + mo = st['mode'] + return struct.pack(cls._fmt_mknod(blen), + st['uid'], st['gid'], + gf, mo, bn, + stat.S_IMODE(mo), 0, umask()) + # mkdir + + def entry_pack_mkdir(cls, gf, bn, mo, uid, gid): + blen = len(bn) + return struct.pack(cls._fmt_mkdir(blen), + uid, gid, gf, mo, bn, + stat.S_IMODE(mo), umask()) + # symlink + + def entry_pack_symlink(cls, gf, bn, lnk, st): + blen = len(bn) + llen = len(lnk) + return struct.pack(cls._fmt_symlink(blen, llen), + st['uid'], st['gid'], + gf, st['mode'], bn, lnk) diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index c8575f00e70..c669f7f7756 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -31,7 +31,7 @@ from repce import RepceServer, RepceClient from master import gmaster_builder import syncdutils from syncdutils import GsyncdError, select, privileged, funcode -from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat +from syncdutils import entry2pb, gauxpfx, errno_wrap, lstat from syncdutils import NoStimeAvailable, PartialHistoryAvailable from syncdutils import ChangelogException, ChangelogHistoryNotAvailable from syncdutils import get_changelog_log_level, get_rsync_version @@ -41,8 +41,8 @@ from gsyncdstatus import GeorepStatus from syncdutils import lf, Popen, sup from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt from syncdutils import unshare_propagation_supported, get_slv_dir_path -import py2py3 -from py2py3 import pipe, str_to_bytearray +from py2py3 import pipe, str_to_bytearray, entry_pack_reg +from py2py3 import entry_pack_reg_stat, entry_pack_mkdir, entry_pack_symlink ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') @@ -377,37 +377,6 @@ class Server(object): def entry_ops(cls, entries): pfx = gauxpfx() logging.debug('entries: %s' % repr(entries)) - # regular file - - def entry_pack_reg(gf, bn, mo, uid, gid): - blen = len(bn) - return struct.pack(cls._fmt_mknod(blen), - uid, gid, gf.encode(), mo, bn.encode(), - stat.S_IMODE(mo), 0, umask()) - - def entry_pack_reg_stat(gf, bn, st): - blen = len(bn) - mo = st['mode'] - return struct.pack(cls._fmt_mknod(blen), - st['uid'], st['gid'], - gf.encode(), mo, bn.encode(), - stat.S_IMODE(mo), 0, umask()) - # mkdir - - def entry_pack_mkdir(gf, bn, mo, uid, gid): - blen = len(bn) - return struct.pack(cls._fmt_mkdir(blen), - uid, gid, gf.encode(), mo, bn.encode(), - stat.S_IMODE(mo), umask()) - # symlink - - def entry_pack_symlink(gf, bn, lnk, st): - blen = len(bn) - llen = len(lnk) - return struct.pack(cls._fmt_symlink(blen, llen), - st['uid'], st['gid'], - gf.encode(), st['mode'], bn.encode(), - lnk.encode()) def entry_purge(op, entry, gfid, e, uid, gid): # This is an extremely racy code and needs to be fixed ASAP. @@ -581,8 +550,8 @@ class Server(object): st = lstat(slink) # don't create multiple entries with same gfid if isinstance(st, int): - blob = entry_pack_reg( - gfid, bname, e['mode'], e['uid'], e['gid']) + blob = entry_pack_reg(cls, gfid, bname, + e['mode'], e['uid'], e['gid']) # Self healed hardlinks are recorded as MKNOD. # So if the gfid already exists, it should be # processed as hard link not mknod. @@ -597,8 +566,8 @@ class Server(object): st = lstat(slink) # don't create multiple entries with same gfid if isinstance(st, int): - blob = entry_pack_mkdir( - gfid, bname, e['mode'], e['uid'], e['gid']) + blob = entry_pack_mkdir(cls, gfid, bname, + e['mode'], e['uid'], e['gid']) elif (isinstance(lstat(en), int) or not matching_disk_gfid(gfid, en)): # If gfid of a directory exists on slave but path based @@ -626,9 +595,9 @@ class Server(object): if isinstance(st, int): (pg, bname) = entry2pb(entry) if stat.S_ISREG(e['stat']['mode']): - blob = entry_pack_reg_stat(gfid, bname, e['stat']) + blob = entry_pack_reg_stat(cls, gfid, bname, e['stat']) elif stat.S_ISLNK(e['stat']['mode']): - blob = entry_pack_symlink(gfid, bname, e['link'], + blob = entry_pack_symlink(cls, gfid, bname, e['link'], e['stat']) else: cmd_ret = errno_wrap(os.link, @@ -639,7 +608,7 @@ class Server(object): en = e['entry'] st = lstat(entry) if isinstance(st, int): - blob = entry_pack_symlink(gfid, bname, e['link'], + blob = entry_pack_symlink(cls, gfid, bname, e['link'], e['stat']) elif not matching_disk_gfid(gfid, en): collect_failure(e, EEXIST, uid, gid) @@ -659,7 +628,7 @@ class Server(object): st1 = lstat(en) if isinstance(st1, int): (pg, bname) = entry2pb(en) - blob = entry_pack_symlink(gfid, bname, + blob = entry_pack_symlink(cls, gfid, bname, e['link'], e['stat']) elif not matching_disk_gfid(gfid, en): collect_failure(e, EEXIST, uid, gid, True) @@ -669,7 +638,7 @@ class Server(object): # don't create multiple entries with same gfid if isinstance(st, int): (pg, bname) = entry2pb(en) - blob = entry_pack_reg_stat(gfid, bname, + blob = entry_pack_reg_stat(cls, gfid, bname, e['stat']) else: cmd_ret = errno_wrap(os.link, [slink, en], diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc index c33ceaaa64e..da22c4befe4 100644 --- a/tests/geo-rep.rc +++ b/tests/geo-rep.rc @@ -40,34 +40,53 @@ function create_data() touch ${master_mnt}/${prefix}_f2 touch ${master_mnt}/${prefix}_f3 + # non-ascii filename test + echo "Hello non-ascii" > ${master_mnt}/${prefix}_f1_ಸಂತಸ + touch ${master_mnt}/${prefix}_f2_ಸಂತಸ + touch ${master_mnt}/${prefix}_f3_ಸಂತಸ + # dir mkdir ${master_mnt}/${prefix}_d1 mkdir ${master_mnt}/${prefix}_d2 mkdir ${master_mnt}/${prefix}_d3 - # Hardlink + # non-ascii dir and filename test + mkdir ${master_mnt}/${prefix}_d1_ನನ್ನ + mkdir ${master_mnt}/${prefix}_d2_ಸಂತಸ + mkdir ${master_mnt}/${prefix}_d3_ಸಂತಸ + echo "Hello non-ascii" > ${master_mnt}/${prefix}_d1_ನನ್ನ/ಸಂತಸ + + # Hardlink + non-ascii name ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1 + ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1_ಸಂತಸ # Symlink cd ${master_mnt} ln -s ${prefix}_f1 ${prefix}_sl1 + ln -s ${prefix}_f1 ${prefix}_sl1_ಸಂತಸ cd - # UNLINK rm ${master_mnt}/${prefix}_f2 + rm ${master_mnt}/${prefix}_f2_ಸಂತಸ # RMDIR rmdir ${master_mnt}/${prefix}_d2 + rmdir ${master_mnt}/${prefix}_d2_ಸಂತಸ # Rename - File mv ${master_mnt}/${prefix}_f3 ${master_mnt}/${prefix}_f4 + mv ${master_mnt}/${prefix}_f3_ಸಂತಸ ${master_mnt}/${prefix}_f4_ಸಂತಸ # Rename - Dir mv ${master_mnt}/${prefix}_d3 ${master_mnt}/${prefix}_d4 + mv ${master_mnt}/${prefix}_d3_ಸಂತಸ ${master_mnt}/${prefix}_d4_ಸಂತಸ # chown touch ${master_mnt}/${prefix}_chown_f1 chown 1000:1000 ${master_mnt}/${prefix}_chown_f1 + touch ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ + chown 1000:1000 ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ } function chown_file_ok() |