From 875740c702d6b30754d46b17d37dd0865743216a Mon Sep 17 00:00:00 2001 From: Shehjar Tikoo Date: Sat, 3 Sep 2011 14:51:45 +0530 Subject: nfs3: Resolve entry vs. hash conflict at same dir depth Intro Note ========== The current code in hard fh resolution takes the first-match approach, i.e. which ever dirent either matches the hash or matches the gfid first is the one chosen as the result for the next step of fh resolution. In the latter case, i.e., dirent matches the gfid, we the next step is to conclude the fh resolution by returning the entry whose gfid matched. In the former, i.e., the hash matches the dirent, we choose the hash-matching dirent as the next directory to descend into, for searching the file to be operated upon. Problem ======= When performing hard fh resolution, there can be a situation where: o the hash of the primary entry,i.e. the entry we're looking for and the hash of another sibling directory, match. Note the use of "sibling", meaning both the primary entry and the hash matching one are in the same directory, i.e., their filehandle.hashcount will be same. o the sibling directory is encountered first during the dir search. Because of the current code described in "Intro", we'll end up descending into the sibling directory even though the correct behaviour is to ignore this and wait till we encounter the primary entry in the same parent directory. Once we end up descending into this sibling directory, the directory depth validation check fails. The check fails because it notices that the resolution is attempting to open a directory that is deeper in the fs tree than the file we're looking for. When this check fails, we return an ESTALE. So basically, a false-positive results in an estale to Specsfs. This is not a theoretical situation. Me and Avati saw this on specsfs test where sfs created terabytes-sized file system for its tests. The number of files was so huge in a single directory that the hashes of two entries ended up colliding. Change-Id: I78d8756252330201e165d788a29277b4bce0df90 BUG: 3510 Reviewed-on: http://review.gluster.com/358 Tested-by: Gluster Build System Reviewed-by: Shehjar Tikoo --- xlators/nfs/server/src/nfs3-helpers.c | 58 ++++++++++++++++++++++++++--------- xlators/nfs/server/src/nfs3.h | 3 ++ 2 files changed, 46 insertions(+), 15 deletions(-) (limited to 'xlators/nfs/server') diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c index aef3bce94..5cdcbecaf 100644 --- a/xlators/nfs/server/src/nfs3-helpers.c +++ b/xlators/nfs/server/src/nfs3-helpers.c @@ -2798,32 +2798,54 @@ out: int -nfs3_fh_resolve_check_response (nfs3_call_state_t *cs, gf_dirent_t *candidate, - int response, off_t last_offt) +nfs3_fh_resolve_determine_response (nfs3_call_state_t *cs) { + + int response = GF_NFS3_FHRESOLVE_NOTFOUND; + + if (!cs) + return response; + + if ((cs->hashmatch) && (cs->entrymatch)) + response = GF_NFS3_FHRESOLVE_FOUND; + else if ((cs->hashmatch) && (!cs->entrymatch)) + response = GF_NFS3_FHRESOLVE_DIRFOUND; + else if ((!cs->hashmatch) && (cs->entrymatch)) + response = GF_NFS3_FHRESOLVE_FOUND; + else if ((!cs->hashmatch) && (!cs->entrymatch)) + response = GF_NFS3_FHRESOLVE_NOTFOUND; + + return response; +} + + +int +nfs3_fh_resolve_check_response (nfs3_call_state_t *cs) { int ret = -EFAULT; nfs_user_t nfu = {0, }; + int response = GF_NFS3_FHRESOLVE_NOTFOUND; if (!cs) return ret; + response = nfs3_fh_resolve_determine_response (cs); switch (response) { case GF_NFS3_FHRESOLVE_DIRFOUND: nfs3_fh_resolve_close_cwd (cs); nfs3_fh_resolve_dir_hard (cs, cs->resolvedloc.inode->gfid, - candidate->d_name); + cs->hashmatch->d_name); break; case GF_NFS3_FHRESOLVE_FOUND: nfs3_fh_resolve_close_cwd (cs); - nfs3_fh_resolve_found (cs, candidate); + nfs3_fh_resolve_found (cs, cs->entrymatch); break; case GF_NFS3_FHRESOLVE_NOTFOUND: nfs_user_root_create (&nfu); nfs_readdirp (cs->nfsx, cs->vol, &nfu, cs->resolve_dir_fd, - GF_NFS3_DTPREF, last_offt, + GF_NFS3_DTPREF, cs->lastentryoffset, nfs3_fh_resolve_readdir_cbk, cs); break; } @@ -2836,27 +2858,30 @@ nfs3_fh_resolve_search_dir (nfs3_call_state_t *cs, gf_dirent_t *entries) { gf_dirent_t *candidate = NULL; int ret = GF_NFS3_FHRESOLVE_NOTFOUND; - off_t lastoff = 0; if ((!cs) || (!entries)) return -EFAULT; if (list_empty (&entries->list)) - goto not_found; + goto search_done; list_for_each_entry (candidate, &entries->list, list) { - lastoff = candidate->d_off; + cs->lastentryoffset = candidate->d_off; gf_log (GF_NFS3, GF_LOG_TRACE, "Candidate: %s, gfid: %s", candidate->d_name, uuid_utoa (candidate->d_stat.ia_gfid)); ret = nfs3_fh_resolve_check_entry (&cs->resolvefh, candidate, cs->hashidx); - if (ret != GF_NFS3_FHRESOLVE_NOTFOUND) - break; + if (ret == GF_NFS3_FHRESOLVE_FOUND) + cs->entrymatch = gf_dirent_for_name (candidate->d_name); + else if (ret == GF_NFS3_FHRESOLVE_DIRFOUND) { + if (cs->hashmatch) + gf_dirent_free (cs->hashmatch); + cs->hashmatch = gf_dirent_for_name (candidate->d_name); + } } -not_found: - nfs3_fh_resolve_check_response (cs, candidate, ret, lastoff); +search_done: return ret; } @@ -2867,18 +2892,21 @@ nfs3_fh_resolve_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_dirent_t *entries) { nfs3_call_state_t *cs = NULL; + nfs_user_t nfu = {0, }; cs = frame->local; if (op_ret <= 0) { gf_log (GF_NFS3, GF_LOG_TRACE, "Directory read done: %s: %s", cs->resolvedloc.path, strerror (op_ret)); - cs->resolve_ret = -1; - cs->resolve_errno = ESTALE; - nfs3_call_resume (cs); + nfs3_fh_resolve_check_response (cs); goto err; } nfs3_fh_resolve_search_dir (cs, entries); + nfs_user_root_create (&nfu); + nfs_readdirp (cs->nfsx, cs->vol, &nfu, cs->resolve_dir_fd, + GF_NFS3_DTPREF, cs->lastentryoffset, + nfs3_fh_resolve_readdir_cbk, cs); err: return 0; diff --git a/xlators/nfs/server/src/nfs3.h b/xlators/nfs/server/src/nfs3.h index cefb7bd4d..d6f20ebd5 100644 --- a/xlators/nfs/server/src/nfs3.h +++ b/xlators/nfs/server/src/nfs3.h @@ -203,6 +203,9 @@ struct nfs3_local { fd_t *resolve_dir_fd; char *resolventry; nfs3_lookup_type_t lookuptype; + gf_dirent_t *hashmatch; + gf_dirent_t *entrymatch; + off_t lastentryoffset; }; #define nfs3_is_revalidate_lookup(cst) ((cst)->lookuptype == GF_NFS3_REVALIDATE) -- cgit