From 8e15e16c9e6a5768f31976cc21b48d5bb10617b7 Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Fri, 18 Nov 2011 10:25:08 -0600 Subject: [PATCH] DAFS: Atomically re-hash vnode in VGetFreeVnode_r VGetFreeVnode_r pulls a vnode off of the vnode LRU, and removes the vnode from the vnode hash table. In DAFS, we may drop the volume glock immediately afterwards in order to close the ihandle for the old vnode structure. While we have the glock dropped, another thread may try to VLookupVnode for the new vnode we are creating, find that it is not hashed, and call VGetFreeVnode_r itself. This can result in two threads having two separate copies of the same vnode, which bypasses any mutual exclusion ensured by per-vnode locks, since they will lock their own version of the vnode. This can result in a variety of different problems where two threads try to write to the same vnode at the same time. One example is calling CopyOnWrite on the same file in parallel, which can cause link undercounts, writes to the wrong vnode tag, and other CoW-related errors. To prevent all this, make VGetFreeVnode_r atomically remove the old vnode structure from the relevant hashes, and add it to the new hashes before dropping the glock. This ensures that any other thread trying to load the same vnode will see the new vnode in the hash table, though it will not yet be valid until the vnode is loaded. Note that this only solves this race for DAFS. For non-DAFS, the vol glock is held over the ihandle close, so this race does not exist. The comments around the callers of VGetFreeVnode_r indicate that similar extant races exist here for non-DAFS, but they are unsolvable without significant DAFS-like changes to the vnode package. Change-Id: I84c5d1bdd29f9e7140e905388b4b65629932c951 Reviewed-on: http://gerrit.openafs.org/6385 Tested-by: BuildBot Reviewed-by: Derrick Brashear --- src/vol/vnode.c | 45 +++++++++++++++++++++++++-------------------- src/vol/vnode.h | 3 ++- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/vol/vnode.c b/src/vol/vnode.c index 424f0a7..37b697f 100644 --- a/src/vol/vnode.c +++ b/src/vol/vnode.c @@ -427,13 +427,19 @@ VInitVnodes(VnodeClass class, int nVnodes) * allocate an unused vnode from the lru chain. * * @param[in] vcp vnode class info object pointer + * @param[in] vp volume pointer + * @param[in] vnodeNumber new vnode number that the vnode will be used for * * @pre VOL_LOCK is held * - * @post vnode object is removed from lru, and vnode hash table. - * vnode is disassociated from volume object. + * @post vnode object is removed from lru + * vnode is disassociated with its old volume, and associated with its + * new volume + * vnode is removed from its old vnode hash table, and for DAFS, it is + * added to its new hash table * state is set to VN_STATE_INVALID. * inode handle is released. + * a reservation is held on the vnode object * * @note we traverse backwards along the lru circlist. It shouldn't * be necessary to specify that nUsers == 0 since if it is in the list, @@ -442,10 +448,14 @@ VInitVnodes(VnodeClass class, int nVnodes) * * @warning DAFS: VOL_LOCK is dropped while doing inode handle release * + * @warning for non-DAFS, the vnode is _not_ hashed on the vnode hash table; + * non-DAFS must hash the vnode itself after loading data + * * @return vnode object pointer */ Vnode * -VGetFreeVnode_r(struct VnodeClassInfo * vcp) +VGetFreeVnode_r(struct VnodeClassInfo * vcp, struct Volume *vp, + VnodeId vnodeNumber) { Vnode *vnp; @@ -472,6 +482,16 @@ VGetFreeVnode_r(struct VnodeClassInfo * vcp) DeleteFromVVnList(vnp); } + /* we must re-hash the vnp _before_ we drop the glock again; otherwise, + * someone else might try to grab the same vnode id, and we'll both alloc + * a vnode object for the same vn id, bypassing vnode locking */ + Vn_id(vnp) = vnodeNumber; + VnCreateReservation_r(vnp); + AddToVVnList(vp, vnp); +#ifdef AFS_DEMAND_ATTACH_FS + AddToVnHash(vnp); +#endif + /* drop the file descriptor */ if (vnp->handle) { #ifdef AFS_DEMAND_ATTACH_FS @@ -706,16 +726,7 @@ VAllocVnode_r(Error * ec, Volume * vp, VnodeType type) } else { /* no such vnode in the cache */ - vnp = VGetFreeVnode_r(vcp); - - /* Initialize the header fields so noone allocates another - * vnode with the same number */ - Vn_id(vnp) = vnodeNumber; - VnCreateReservation_r(vnp); - AddToVVnList(vp, vnp); -#ifdef AFS_DEMAND_ATTACH_FS - AddToVnHash(vnp); -#endif + vnp = VGetFreeVnode_r(vcp, vp, vnodeNumber); /* This will never block (guaranteed by check in VGetFreeVnode_r() */ VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK); @@ -1209,17 +1220,11 @@ VGetVnode_r(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype) /* Not in cache; tentatively grab most distantly used one from the LRU * chain */ vcp->reads++; - vnp = VGetFreeVnode_r(vcp); + vnp = VGetFreeVnode_r(vcp, vp, vnodeNumber); /* Initialize */ vnp->changed_newTime = vnp->changed_oldTime = 0; vnp->delete = 0; - Vn_id(vnp) = vnodeNumber; - VnCreateReservation_r(vnp); - AddToVVnList(vp, vnp); -#ifdef AFS_DEMAND_ATTACH_FS - AddToVnHash(vnp); -#endif /* * XXX for non-DAFS, there is a serious diff --git a/src/vol/vnode.h b/src/vol/vnode.h index fb677a7..a374258 100644 --- a/src/vol/vnode.h +++ b/src/vol/vnode.h @@ -274,7 +274,8 @@ extern int VVnodeWriteToRead_r(Error * ec, Vnode * vnp); extern Vnode *VAllocVnode(Error * ec, struct Volume *vp, VnodeType type); extern Vnode *VAllocVnode_r(Error * ec, struct Volume *vp, VnodeType type); /*extern VFreeVnode();*/ -extern Vnode *VGetFreeVnode_r(struct VnodeClassInfo *vcp); +extern Vnode *VGetFreeVnode_r(struct VnodeClassInfo *vcp, struct Volume *vp, + VnodeId vnodeNumber); extern Vnode *VLookupVnode(struct Volume * vp, VnodeId vnodeId); extern void AddToVVnList(struct Volume * vp, Vnode * vnp); -- 1.9.4