From 20dc2832268eb81d40e798da0d424c98cf26062c Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Sun, 24 Nov 2019 22:36:17 -0600 Subject: [PATCH] FBSD: Ignore VI_DOOMED vnodes Currently on FreeBSD, osi_TryEvictVCache calls vgone() for our vnode after checking if the given vcache is in use. vgone() then calls our VOP_RECLAIM operation, which calls afs_vop_reclaim, which calls afs_FlushVCache to finally actually flush the vcache. The current approach has at least the following major issues: - In afs_vop_reclaim, we return success even if afs_FlushVCache() fails. This allows FreeBSD to reuse the vnode for another file, but the vnode is still being referenced by our vcache, which is referenced by the global VLRU and various other structures. This causes all kinds of weird errors, since we try to use the underlying vnode for different files. - After the relevant checks in osi_TryEvictVCache are done, another thread can acquire a new reference to our vcache (this can happen while vgone() is running up until the vnode is locked). This new reference will cause afs_FlushVCache to fail. - Our afs_vop_reclaim callback is called while the vnode is locked, and can acquire afs_xvcache. Other code locks the vnode while afs_xvcache is already held (such as afs_PutVCache -> vrele). This can lead to deadlocks if two threads try to run these codepaths for the same vnode at the same time. - afs_vop_reclaim optionally acquires afs_xvcache based on the return value of CheckLock(&afs_xvcache). However, CheckLock just returns if that lock is locked by anyone, not if the current thread holds the lock. This can result in the rest of the function running without afs_xvcache actually being held if we drop AFS_GLOCK at any point. - osi_TryEvictVCache() tries to vn_lock() the target vnode, but we may already have another vnode locked in the current thread. If the vnode we're trying to evict is a descendant of a vnode we already have locked, this can deadlock. To fix these issues, make some changes to how our vcache management works on FreeBSD: - Do not allow anyone to hold a new reference on a VI_DOOMED vnode. We do this by checking for VI_DOOMED in osi_vnhold, and returning an error if VI_DOOMED is set. - In afs_vop_reclaim, panic if afs_FlushVCache fails. With the new VI_DOOMED check, afs_FlushVCache show now never fail; and if it somehow does, panic'ing immediately is better than corrupting various structures and panic'ing later on. - Move around some of the relevant locking in afs_vop_reclaim to fix the lock-related issues. - In osi_TryEvictVCache, don't wait for the vnode lock (LK_NOWAIT); treat the vnode as "in use" if we can't immediately obtain the lock. Thanks to tcreech@tcreech.com and kaduk@mit.edu for insight and help investigating the relevant issues. FIXES 135041 Change-Id: I23e94ecebbddc8c68a8f4ea918d64efd0f9f9dfd Reviewed-on: https://gerrit.openafs.org/13972 Tested-by: BuildBot Reviewed-by: Benjamin Kaduk --- src/afs/FBSD/osi_vcache.c | 42 +++++++++++++++++++++++++++++++----------- src/afs/FBSD/osi_vm.c | 3 +-- src/afs/FBSD/osi_vnodeops.c | 28 ++++++++++++++++++++-------- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/afs/FBSD/osi_vcache.c b/src/afs/FBSD/osi_vcache.c index 707bcd3..922d8c1 100644 --- a/src/afs/FBSD/osi_vcache.c +++ b/src/afs/FBSD/osi_vcache.c @@ -18,39 +18,50 @@ osi_TryEvictVCache(struct vcache *avc, int *slept, int defersleep) { struct vnode *vp; int code; + int evicted = 0; vp = AFSTOV(avc); if (!VI_TRYLOCK(vp)) - return 0; + return evicted; code = osi_fbsd_checkinuse(avc); if (code != 0) { VI_UNLOCK(vp); - return 0; + return evicted; } if ((vp->v_iflag & VI_DOOMED) != 0) { VI_UNLOCK(vp); - return 1; + evicted = 1; + return evicted; } - /* must hold the vnode before calling vgone() - * This code largely copied from vfs_subr.c:vlrureclaim() */ vholdl(vp); ReleaseWriteLock(&afs_xvcache); AFS_GUNLOCK(); *slept = 1; - /* use the interlock while locking, so no one else can DOOM this */ - vn_lock(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_RETRY); - vgone(vp); - VOP_UNLOCK(vp, 0); + + if (vn_lock(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT) == 0) { + /* + * vrecycle() will vgone() only if its usecount is 0. If someone grabbed a + * new usecount ref just now, the vgone() will be skipped, and vrecycle + * will return 0. + */ + if (vrecycle(vp) != 0) { + evicted = 1; + } + + VOP_UNLOCK(vp, 0); + } + vdrop(vp); AFS_GLOCK(); ObtainWriteLock(&afs_xvcache, 340); - return 1; + + return evicted; } struct vcache * @@ -115,6 +126,15 @@ osi_PostPopulateVCache(struct vcache *avc) int osi_vnhold(struct vcache *avc) { - vref(AFSTOV(avc)); + struct vnode *vp = AFSTOV(avc); + + VI_LOCK(vp); + if ((vp->v_iflag & VI_DOOMED) != 0) { + VI_UNLOCK(vp); + return ENOENT; + } + + vrefl(AFSTOV(avc)); + VI_UNLOCK(vp); return 0; } diff --git a/src/afs/FBSD/osi_vm.c b/src/afs/FBSD/osi_vm.c index 49bbe98..780ae12 100644 --- a/src/afs/FBSD/osi_vm.c +++ b/src/afs/FBSD/osi_vm.c @@ -79,8 +79,7 @@ osi_VM_FlushVCache(struct vcache *avc) vp = AFSTOV(avc); - if (!VI_TRYLOCK(vp)) - return EBUSY; + VI_LOCK(vp); code = osi_fbsd_checkinuse(avc); if (code) { VI_UNLOCK(vp); diff --git a/src/afs/FBSD/osi_vnodeops.c b/src/afs/FBSD/osi_vnodeops.c index 9f3cc37..828a0a4 100644 --- a/src/afs/FBSD/osi_vnodeops.c +++ b/src/afs/FBSD/osi_vnodeops.c @@ -1128,18 +1128,31 @@ afs_vop_inactive(ap) static int afs_vop_reclaim(struct vop_reclaim_args *ap) { - /* copied from ../OBSD/osi_vnodeops.c:afs_nbsd_reclaim() */ int code, slept; struct vnode *vp = ap->a_vp; struct vcache *avc = VTOAFS(vp); int haveGlock = ISAFS_GLOCK(); - int haveVlock = CheckLock(&afs_xvcache); + + /* + * In other code paths, we acquire the vnode lock while afs_xvcache is + * already held (e.g. afs_PutVCache() -> vrele()). Here, we already have + * the vnode lock, and we need afs_xvcache. So drop the vnode lock in order + * to hold afs_xvcache. + */ + VOP_UNLOCK(vp, 0); if (!haveGlock) AFS_GLOCK(); - if (!haveVlock) - ObtainWriteLock(&afs_xvcache, 901); - /* reclaim the vnode and the in-memory vcache, but keep the on-disk vcache */ + ObtainWriteLock(&afs_xvcache, 901); + + /* + * Note that we deliberately call VOP_LOCK() instead of vn_lock() here. + * vn_lock() will return an error for VI_DOOMED vnodes, but we know this + * vnode is already VI_DOOMED. We just want to lock it again, and skip the + * VI_DOOMED check. + */ + VOP_LOCK(vp, LK_EXCLUSIVE); + code = afs_FlushVCache(avc, &slept); if (avc->f.states & CVInit) { @@ -1147,17 +1160,16 @@ afs_vop_reclaim(struct vop_reclaim_args *ap) afs_osi_Wakeup(&avc->f.states); } - if (!haveVlock) - ReleaseWriteLock(&afs_xvcache); + ReleaseWriteLock(&afs_xvcache); if (!haveGlock) AFS_GUNLOCK(); if (code) { afs_warn("afs_vop_reclaim: afs_FlushVCache failed code %d vnode\n", code); VOP_PRINT(vp); + panic("afs: afs_FlushVCache failed during reclaim"); } - /* basically, it must not fail */ vnode_destroy_vobject(vp); vp->v_data = 0; -- 1.9.4