From 20dc2832268eb81d40e798da0d424c98cf26062c Mon Sep 17 00:00:00 2001
From: Andrew Deason <adeason@dson.org>
Date: Sun, 24 Nov 2019 22:36:17 -0600
Subject: [PATCH] FBSD: Ignore VI_DOOMED vnodes

Currently on FreeBSD, osi_TryEvictVCache calls vgone() for our vnode
after checking if the given vcache is in use. vgone() then calls our
VOP_RECLAIM operation, which calls afs_vop_reclaim, which calls
afs_FlushVCache to finally actually flush the vcache.

The current approach has at least the following major issues:

- In afs_vop_reclaim, we return success even if afs_FlushVCache()
  fails. This allows FreeBSD to reuse the vnode for another file, but
  the vnode is still being referenced by our vcache, which is
  referenced by the global VLRU and various other structures. This
  causes all kinds of weird errors, since we try to use the underlying
  vnode for different files.

- After the relevant checks in osi_TryEvictVCache are done, another
  thread can acquire a new reference to our vcache (this can happen
  while vgone() is running up until the vnode is locked). This new
  reference will cause afs_FlushVCache to fail.

- Our afs_vop_reclaim callback is called while the vnode is locked,
  and can acquire afs_xvcache. Other code locks the vnode while
  afs_xvcache is already held (such as afs_PutVCache -> vrele). This
  can lead to deadlocks if two threads try to run these codepaths for
  the same vnode at the same time.

- afs_vop_reclaim optionally acquires afs_xvcache based on the return
  value of CheckLock(&afs_xvcache). However, CheckLock just returns if
  that lock is locked by anyone, not if the current thread holds the
  lock. This can result in the rest of the function running without
  afs_xvcache actually being held if we drop AFS_GLOCK at any point.

- osi_TryEvictVCache() tries to vn_lock() the target vnode, but we may
  already have another vnode locked in the current thread. If the
  vnode we're trying to evict is a descendant of a vnode we already
  have locked, this can deadlock.

To fix these issues, make some changes to how our vcache management
works on FreeBSD:

- Do not allow anyone to hold a new reference on a VI_DOOMED vnode.
  We do this by checking for VI_DOOMED in osi_vnhold, and returning an
  error if VI_DOOMED is set.

- In afs_vop_reclaim, panic if afs_FlushVCache fails. With the new
  VI_DOOMED check, afs_FlushVCache show now never fail; and if it
  somehow does, panic'ing immediately is better than corrupting
  various structures and panic'ing later on.

- Move around some of the relevant locking in afs_vop_reclaim to fix
  the lock-related issues.

- In osi_TryEvictVCache, don't wait for the vnode lock (LK_NOWAIT);
  treat the vnode as "in use" if we can't immediately obtain the lock.

Thanks to tcreech@tcreech.com and kaduk@mit.edu for insight and help
investigating the relevant issues.

FIXES 135041

Change-Id: I23e94ecebbddc8c68a8f4ea918d64efd0f9f9dfd
Reviewed-on: https://gerrit.openafs.org/13972
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Benjamin Kaduk <kaduk@mit.edu>
---
 src/afs/FBSD/osi_vcache.c   | 42 +++++++++++++++++++++++++++++++-----------
 src/afs/FBSD/osi_vm.c       |  3 +--
 src/afs/FBSD/osi_vnodeops.c | 28 ++++++++++++++++++++--------
 3 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/src/afs/FBSD/osi_vcache.c b/src/afs/FBSD/osi_vcache.c
index 707bcd3..922d8c1 100644
--- a/src/afs/FBSD/osi_vcache.c
+++ b/src/afs/FBSD/osi_vcache.c
@@ -18,39 +18,50 @@ osi_TryEvictVCache(struct vcache *avc, int *slept, int defersleep)
 {
     struct vnode *vp;
     int code;
+    int evicted = 0;
 
     vp = AFSTOV(avc);
 
     if (!VI_TRYLOCK(vp))
-	return 0;
+	return evicted;
     code = osi_fbsd_checkinuse(avc);
     if (code != 0) {
 	VI_UNLOCK(vp);
-	return 0;
+	return evicted;
     }
 
     if ((vp->v_iflag & VI_DOOMED) != 0) {
 	VI_UNLOCK(vp);
-	return 1;
+	evicted = 1;
+	return evicted;
     }
 
-    /* must hold the vnode before calling vgone()
-     * This code largely copied from vfs_subr.c:vlrureclaim() */
     vholdl(vp);
 
     ReleaseWriteLock(&afs_xvcache);
     AFS_GUNLOCK();
 
     *slept = 1;
-    /* use the interlock while locking, so no one else can DOOM this */
-    vn_lock(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_RETRY);
-    vgone(vp);
-    VOP_UNLOCK(vp, 0);
+
+    if (vn_lock(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT) == 0) {
+	/*
+	 * vrecycle() will vgone() only if its usecount is 0. If someone grabbed a
+	 * new usecount ref just now, the vgone() will be skipped, and vrecycle
+	 * will return 0.
+	 */
+	if (vrecycle(vp) != 0) {
+	    evicted = 1;
+	}
+
+	VOP_UNLOCK(vp, 0);
+    }
+
     vdrop(vp);
 
     AFS_GLOCK();
     ObtainWriteLock(&afs_xvcache, 340);
-    return 1;
+
+    return evicted;
 }
 
 struct vcache *
@@ -115,6 +126,15 @@ osi_PostPopulateVCache(struct vcache *avc)
 int
 osi_vnhold(struct vcache *avc)
 {
-    vref(AFSTOV(avc));
+    struct vnode *vp = AFSTOV(avc);
+
+    VI_LOCK(vp);
+    if ((vp->v_iflag & VI_DOOMED) != 0) {
+	VI_UNLOCK(vp);
+	return ENOENT;
+    }
+
+    vrefl(AFSTOV(avc));
+    VI_UNLOCK(vp);
     return 0;
 }
diff --git a/src/afs/FBSD/osi_vm.c b/src/afs/FBSD/osi_vm.c
index 49bbe98..780ae12 100644
--- a/src/afs/FBSD/osi_vm.c
+++ b/src/afs/FBSD/osi_vm.c
@@ -79,8 +79,7 @@ osi_VM_FlushVCache(struct vcache *avc)
 
     vp = AFSTOV(avc);
 
-    if (!VI_TRYLOCK(vp))
-	return EBUSY;
+    VI_LOCK(vp);
     code = osi_fbsd_checkinuse(avc);
     if (code) {
 	VI_UNLOCK(vp);
diff --git a/src/afs/FBSD/osi_vnodeops.c b/src/afs/FBSD/osi_vnodeops.c
index 9f3cc37..828a0a4 100644
--- a/src/afs/FBSD/osi_vnodeops.c
+++ b/src/afs/FBSD/osi_vnodeops.c
@@ -1128,18 +1128,31 @@ afs_vop_inactive(ap)
 static int
 afs_vop_reclaim(struct vop_reclaim_args *ap)
 {
-    /* copied from ../OBSD/osi_vnodeops.c:afs_nbsd_reclaim() */
     int code, slept;
     struct vnode *vp = ap->a_vp;
     struct vcache *avc = VTOAFS(vp);
     int haveGlock = ISAFS_GLOCK();
-    int haveVlock = CheckLock(&afs_xvcache);
+
+    /*
+     * In other code paths, we acquire the vnode lock while afs_xvcache is
+     * already held (e.g. afs_PutVCache() -> vrele()). Here, we already have
+     * the vnode lock, and we need afs_xvcache. So drop the vnode lock in order
+     * to hold afs_xvcache.
+     */
+    VOP_UNLOCK(vp, 0);
 
     if (!haveGlock)
 	AFS_GLOCK();
-    if (!haveVlock)
-	ObtainWriteLock(&afs_xvcache, 901);
-    /* reclaim the vnode and the in-memory vcache, but keep the on-disk vcache */
+    ObtainWriteLock(&afs_xvcache, 901);
+
+    /*
+     * Note that we deliberately call VOP_LOCK() instead of vn_lock() here.
+     * vn_lock() will return an error for VI_DOOMED vnodes, but we know this
+     * vnode is already VI_DOOMED. We just want to lock it again, and skip the
+     * VI_DOOMED check.
+     */
+    VOP_LOCK(vp, LK_EXCLUSIVE);
+
     code = afs_FlushVCache(avc, &slept);
 
     if (avc->f.states & CVInit) {
@@ -1147,17 +1160,16 @@ afs_vop_reclaim(struct vop_reclaim_args *ap)
 	afs_osi_Wakeup(&avc->f.states);
     }
 
-    if (!haveVlock)
-	ReleaseWriteLock(&afs_xvcache);
+    ReleaseWriteLock(&afs_xvcache);
     if (!haveGlock)
 	AFS_GUNLOCK();
 
     if (code) {
 	afs_warn("afs_vop_reclaim: afs_FlushVCache failed code %d vnode\n", code);
 	VOP_PRINT(vp);
+	panic("afs: afs_FlushVCache failed during reclaim");
     }
 
-    /* basically, it must not fail */
     vnode_destroy_vobject(vp);
     vp->v_data = 0;
 
-- 
1.9.4