From fdf1dfc5f92fcd149a7ae0945e4458993b2ad61e Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Fri, 2 Mar 2012 10:52:35 -0500 Subject: [PATCH] Windows: Avoid deadlock in invalidation path During data version invalidation the AFS redirector must CcPurge any non-dirty extents on a file. This operation can be intercepted by a filter driver which in turn might open the file and close it again before the CcPurge completes. The AFSPerformObjectInvalidate call holds the ExtentsResource shared which can deadlock if AFSClose attempts an extent tear down which requires exclusive access to the ExtentsResource. Change-Id: I7cb0289d8036aabf56bb11fd12a79308be45faa8 Reviewed-on: http://gerrit.openafs.org/6856 Reviewed-by: Derrick Brashear Tested-by: BuildBot Reviewed-by: Jeffrey Altman Tested-by: Jeffrey Altman --- src/WINNT/afsrdr/common/AFSRedirCommonDefines.h | 2 +- src/WINNT/afsrdr/kernel/lib/AFSExtentsSupport.cpp | 100 ++++++++ src/WINNT/afsrdr/kernel/lib/AFSGeneric.cpp | 285 ++++++++++++++++++---- src/WINNT/afsrdr/kernel/lib/Include/AFSCommon.h | 5 + src/WINNT/afsrdr/kernel/lib/Include/AFSStructs.h | 9 + 5 files changed, 350 insertions(+), 51 deletions(-) diff --git a/src/WINNT/afsrdr/common/AFSRedirCommonDefines.h b/src/WINNT/afsrdr/common/AFSRedirCommonDefines.h index ff0f15b..26035a5 100644 --- a/src/WINNT/afsrdr/common/AFSRedirCommonDefines.h +++ b/src/WINNT/afsrdr/common/AFSRedirCommonDefines.h @@ -125,7 +125,7 @@ #define AFS_NETWORK_PROVIDER_11_TAG 'BZFA' #define AFS_AG_ENTRY_CB_TAG 'GAFA' #define AFS_PROCESS_AG_CB_TAG 'APFA' - +#define AFS_BYTERANGE_TAG '_RBA' #define __Enter #define try_return(S) { S; goto try_exit; } diff --git a/src/WINNT/afsrdr/kernel/lib/AFSExtentsSupport.cpp b/src/WINNT/afsrdr/kernel/lib/AFSExtentsSupport.cpp index 94a2795..0ee0643 100644 --- a/src/WINNT/afsrdr/kernel/lib/AFSExtentsSupport.cpp +++ b/src/WINNT/afsrdr/kernel/lib/AFSExtentsSupport.cpp @@ -4102,6 +4102,106 @@ AFSRemoveEntryDirtyList( IN AFSFcb *Fcb, return; } +ULONG +AFSConstructCleanByteRangeList( AFSFcb * pFcb, + AFSByteRange ** pByteRangeList) +{ + + ULONG ulByteRangeMax; + ULONG ulByteRangeCount = 0; + AFSByteRange *ByteRangeList; + AFSExtent *pExtent, *pNextExtent; + + AFSAcquireShared( &pFcb->NPFcb->Specific.File.DirtyExtentsListLock, TRUE); + + ulByteRangeMax = pFcb->Specific.File.ExtentsDirtyCount + 1; + + ByteRangeList = (AFSByteRange *) AFSExAllocatePoolWithTag( PagedPool, + ulByteRangeMax * sizeof( AFSByteRange), + AFS_BYTERANGE_TAG); + + if ( ByteRangeList == NULL) + { + + (*pByteRangeList) = NULL; + + try_return( ulByteRangeCount = DWORD_MAX); + } + + RtlZeroMemory( ByteRangeList, + ulByteRangeMax * sizeof( AFSByteRange)); + + // + // The for loop populates the ByteRangeList entries with values that are + // the gaps in the DirtyList. In other words, if a range is not present + // in the DirtyList it will be represented in the ByteRangeList array. + // + + for ( ulByteRangeCount = 0, + pExtent = (AFSExtent *)pFcb->NPFcb->Specific.File.DirtyListHead; + ulByteRangeCount < ulByteRangeMax && pExtent != NULL; + pExtent = pNextExtent) + { + + pNextExtent = (AFSExtent *)pExtent->DirtyList.fLink; + + // + // The first time the for() is entered the ulByteRangeCount will be zero and + // ByteRangeList[0] FileOffset and Length will both be zero. If the first + // extent is not for offset zero, the ByteRangeList[0] Length is set to the + // FileOffset of the Extent. + // + // Future passes through the loop behave in a similar fashion but + // ByteRangeList[ulByteRangeCount] FileOffset will have been set below. + // + + if ( pExtent->FileOffset.QuadPart != ByteRangeList[ulByteRangeCount].FileOffset.QuadPart + ByteRangeList[ulByteRangeCount].Length.QuadPart) + { + + ByteRangeList[ulByteRangeCount].Length.QuadPart = + pExtent->FileOffset.QuadPart - ByteRangeList[ulByteRangeCount].FileOffset.QuadPart; + + ulByteRangeCount++; + } + + // + // Having processed the current dirty extent, the following while loop + // searches for the next clean gap between dirty extents. + // + + while ( pNextExtent && pNextExtent->FileOffset.QuadPart == pExtent->FileOffset.QuadPart + pExtent->Size) + { + + pExtent = pNextExtent; + + pNextExtent = (AFSExtent *)pExtent->DirtyList.fLink; + } + + // + // Having found the next gap, the ByteRangeList[] FileOffset is set to the start of the gap. + // The Length is left at zero and will be assigned either when the for loop continues or + // when the for loop exits. + // + + ByteRangeList[ulByteRangeCount].FileOffset.QuadPart = pExtent->FileOffset.QuadPart + pExtent->Size; + } + + // + // Assign the Length of the final clean range to match the file length. + // + + ByteRangeList[ulByteRangeCount].Length.QuadPart = + pFcb->ObjectInformation->EndOfFile.QuadPart - ByteRangeList[ulByteRangeCount].FileOffset.QuadPart; + + (*pByteRangeList) = ByteRangeList; + + try_exit: + + AFSReleaseResource( &pFcb->NPFcb->Specific.File.DirtyExtentsListLock); + + return ulByteRangeCount; +} + #if GEN_MD5 void AFSSetupMD5Hash( IN AFSFcb *Fcb, diff --git a/src/WINNT/afsrdr/kernel/lib/AFSGeneric.cpp b/src/WINNT/afsrdr/kernel/lib/AFSGeneric.cpp index c104f8f..2202c57 100644 --- a/src/WINNT/afsrdr/kernel/lib/AFSGeneric.cpp +++ b/src/WINNT/afsrdr/kernel/lib/AFSGeneric.cpp @@ -8571,6 +8571,9 @@ AFSPerformObjectInvalidate( IN AFSObjectInfoCB *ObjectInfo, LARGE_INTEGER liCurrentOffset = {0,0}; LARGE_INTEGER liFlushLength = {0,0}; ULONG ulFlushLength = 0; + BOOLEAN bLocked = FALSE; + BOOLEAN bExtentsLocked = FALSE; + BOOLEAN bCleanExtents = FALSE; if( ObjectInfo->FileType == AFS_FILE_TYPE_FILE && ObjectInfo->Fcb != NULL) @@ -8579,6 +8582,8 @@ AFSPerformObjectInvalidate( IN AFSObjectInfoCB *ObjectInfo, AFSAcquireExcl( &ObjectInfo->Fcb->NPFcb->Resource, TRUE); + bLocked = TRUE; + AFSDbgLogMsg( AFS_SUBSYSTEM_LOCK_PROCESSING, AFS_TRACE_LEVEL_VERBOSE, "AFSPerformObjectInvalidate Acquiring Fcb extents lock %08lX SHARED %08lX\n", @@ -8588,99 +8593,279 @@ AFSPerformObjectInvalidate( IN AFSObjectInfoCB *ObjectInfo, AFSAcquireShared( &ObjectInfo->Fcb->NPFcb->Specific.File.ExtentsResource, TRUE); - __try + bExtentsLocked = TRUE; + + // + // There are several possibilities here: + // + // 0. If there are no extents or all of the extents are dirty, do nothing. + // + // 1. There could be nothing dirty and an open reference count of zero + // in which case we can just tear down all of the extents without + // holding any resources. + // + // 2. There could be nothing dirty and a non-zero open reference count + // in which case we can issue a CcPurge against the entire file + // while holding just the Fcb Resource. + // + // 3. There can be dirty extents in which case we need to identify + // the non-dirty ranges and then perform a CcPurge on just the + // non-dirty ranges while holding just the Fcb Resource. + // + + if ( ObjectInfo->Fcb->Specific.File.ExtentCount != ObjectInfo->Fcb->Specific.File.ExtentsDirtyCount) { - le = ObjectInfo->Fcb->Specific.File.ExtentsLists[AFS_EXTENTS_LIST].Flink; + if ( ObjectInfo->Fcb->Specific.File.ExtentsDirtyCount == 0) + { - ulProcessCount = 0; + if ( ObjectInfo->Fcb->OpenReferenceCount == 0) + { - ulCount = (ULONG)ObjectInfo->Fcb->Specific.File.ExtentCount; + AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Specific.File.ExtentsResource ); - if( ulCount > 0) - { - pEntry = ExtentFor( le, AFS_EXTENTS_LIST ); + bExtentsLocked = FALSE; + + AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Resource); + + bLocked = FALSE; - while( ulProcessCount < ulCount) + (VOID) AFSTearDownFcbExtents( ObjectInfo->Fcb, + NULL); + } + else { - pEntry = ExtentFor( le, AFS_EXTENTS_LIST ); - if( !BooleanFlagOn( pEntry->Flags, AFS_EXTENT_DIRTY)) + __try { + + AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Specific.File.ExtentsResource ); + + bExtentsLocked = FALSE; + if( !CcPurgeCacheSection( &ObjectInfo->Fcb->NPFcb->SectionObjectPointers, - &pEntry->FileOffset, - pEntry->Size, + NULL, + 0, FALSE)) { SetFlag( ObjectInfo->Fcb->Flags, AFS_FCB_FLAG_PURGE_ON_CLOSE); } + else + { + + bCleanExtents = TRUE; + } } + __except( EXCEPTION_EXECUTE_HANDLER) + { - if( liCurrentOffset.QuadPart < pEntry->FileOffset.QuadPart) + ntStatus = GetExceptionCode(); + + AFSDbgLogMsg( 0, + 0, + "EXCEPTION - AFSPerformObjectInvalidate Status %08lX\n", + ntStatus); + } + } + } + else + { + + // + // Must build a list of non-dirty ranges from the beginning of the file + // to the end. There can be at most (Fcb->Specific.File.ExtentsDirtyCount + 1) + // ranges. In all but the most extreme random data write scenario there will + // be significantly fewer. + // + // For each range we need offset and size. + // + + AFSByteRange * ByteRangeList = NULL; + ULONG ulByteRangeCount = 0; + ULONG ulIndex; + BOOLEAN bPurgeOnClose = FALSE; + + __try + { + + ulByteRangeCount = AFSConstructCleanByteRangeList( ObjectInfo->Fcb, + &ByteRangeList); + + if ( ByteRangeList != NULL || + ulByteRangeCount == 0) { - liFlushLength.QuadPart = pEntry->FileOffset.QuadPart - liCurrentOffset.QuadPart; + AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Specific.File.ExtentsResource ); + + bExtentsLocked = FALSE; - while( liFlushLength.QuadPart > 0) + for ( ulIndex = 0; ulIndex < ulByteRangeCount; ulIndex++) { - if( liFlushLength.QuadPart > 512 * 1024000) - { - ulFlushLength = 512 * 1024000; - } - else + ULONG ulSize; + + do { + + ulSize = ByteRangeList[ulIndex].Length.QuadPart > DWORD_MAX ? DWORD_MAX : ByteRangeList[ulIndex].Length.LowPart; + + if( !CcPurgeCacheSection( &ObjectInfo->Fcb->NPFcb->SectionObjectPointers, + &ByteRangeList[ulIndex].FileOffset, + ulSize, + FALSE)) + { + + bPurgeOnClose = TRUE; + } + else + { + + bCleanExtents = TRUE; + } + + ByteRangeList[ulIndex].Length.QuadPart -= ulSize; + + ByteRangeList[ulIndex].FileOffset.QuadPart += ulSize; + + } while ( ByteRangeList[ulIndex].Length.QuadPart > 0); + } + } + else + { + + // + // We couldn't allocate the memory to build the purge list + // so just walk the extent list while holding the ExtentsList Resource. + // This could deadlock but we do not have much choice. + // + + le = ObjectInfo->Fcb->Specific.File.ExtentsLists[AFS_EXTENTS_LIST].Flink; + + ulProcessCount = 0; + + ulCount = (ULONG)ObjectInfo->Fcb->Specific.File.ExtentCount; + + if( ulCount > 0) + { + pEntry = ExtentFor( le, AFS_EXTENTS_LIST ); + + while( ulProcessCount < ulCount) { - ulFlushLength = liFlushLength.LowPart; + pEntry = ExtentFor( le, AFS_EXTENTS_LIST ); + + if( !BooleanFlagOn( pEntry->Flags, AFS_EXTENT_DIRTY)) + { + if( !CcPurgeCacheSection( &ObjectInfo->Fcb->NPFcb->SectionObjectPointers, + &pEntry->FileOffset, + pEntry->Size, + FALSE)) + { + + bPurgeOnClose = TRUE; + } + else + { + + bCleanExtents = TRUE; + } + } + + if( liCurrentOffset.QuadPart < pEntry->FileOffset.QuadPart) + { + + liFlushLength.QuadPart = pEntry->FileOffset.QuadPart - liCurrentOffset.QuadPart; + + while( liFlushLength.QuadPart > 0) + { + + if( liFlushLength.QuadPart > 512 * 1024000) + { + ulFlushLength = 512 * 1024000; + } + else + { + ulFlushLength = liFlushLength.LowPart; + } + + if( !CcPurgeCacheSection( &ObjectInfo->Fcb->NPFcb->SectionObjectPointers, + &liCurrentOffset, + ulFlushLength, + FALSE)) + { + + bPurgeOnClose = TRUE; + } + else + { + + bCleanExtents = TRUE; + } + + liFlushLength.QuadPart -= ulFlushLength; + } + } + + liCurrentOffset.QuadPart = pEntry->FileOffset.QuadPart + pEntry->Size; + + ulProcessCount++; + le = le->Flink; } - + } + else + { if( !CcPurgeCacheSection( &ObjectInfo->Fcb->NPFcb->SectionObjectPointers, - &liCurrentOffset, - ulFlushLength, + NULL, + 0, FALSE)) { - SetFlag( ObjectInfo->Fcb->Flags, AFS_FCB_FLAG_PURGE_ON_CLOSE); + + bPurgeOnClose = TRUE; } + else + { - liFlushLength.QuadPart -= ulFlushLength; + bCleanExtents = TRUE; + } } - } - liCurrentOffset.QuadPart = pEntry->FileOffset.QuadPart + pEntry->Size; + if ( bPurgeOnClose) + { - ulProcessCount++; - le = le->Flink; + SetFlag( ObjectInfo->Fcb->Flags, AFS_FCB_FLAG_PURGE_ON_CLOSE); + } + } } - } - else - { - if( !CcPurgeCacheSection( &ObjectInfo->Fcb->NPFcb->SectionObjectPointers, - NULL, - 0, - FALSE)) + __except( EXCEPTION_EXECUTE_HANDLER) { - SetFlag( ObjectInfo->Fcb->Flags, AFS_FCB_FLAG_PURGE_ON_CLOSE); + + ntStatus = GetExceptionCode(); + + AFSDbgLogMsg( 0, + 0, + "EXCEPTION - AFSPerformObjectInvalidate Status %08lX\n", + ntStatus); } } } - __except( EXCEPTION_EXECUTE_HANDLER) + + if ( bExtentsLocked) { - ntStatus = GetExceptionCode(); + AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Specific.File.ExtentsResource ); } - AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Specific.File.ExtentsResource ); - - AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Resource); + if ( bLocked) + { - AFSReleaseCleanExtents( ObjectInfo->Fcb, - NULL); - } + AFSReleaseResource( &ObjectInfo->Fcb->NPFcb->Resource); + } - break; - } + if ( bCleanExtents) + { - default: - { + AFSReleaseCleanExtents( ObjectInfo->Fcb, + NULL); + } + } break; } diff --git a/src/WINNT/afsrdr/kernel/lib/Include/AFSCommon.h b/src/WINNT/afsrdr/kernel/lib/Include/AFSCommon.h index 7f28406..fc130b0 100644 --- a/src/WINNT/afsrdr/kernel/lib/Include/AFSCommon.h +++ b/src/WINNT/afsrdr/kernel/lib/Include/AFSCommon.h @@ -47,6 +47,7 @@ extern "C" #include #include // for IoCreateDeviceSecure #include +#include #include "AFSDefines.h" @@ -430,6 +431,10 @@ AFSRemoveEntryDirtyList( IN AFSFcb *Fcb, AFSExtent * ExtentFor( PLIST_ENTRY le, ULONG SkipList ); +ULONG +AFSConstructCleanByteRangeList( AFSFcb * pFcb, + AFSByteRange ** pByteRangeList); + #if GEN_MD5 void AFSSetupMD5Hash( IN AFSFcb *Fcb, diff --git a/src/WINNT/afsrdr/kernel/lib/Include/AFSStructs.h b/src/WINNT/afsrdr/kernel/lib/Include/AFSStructs.h index c410892..f350942 100644 --- a/src/WINNT/afsrdr/kernel/lib/Include/AFSStructs.h +++ b/src/WINNT/afsrdr/kernel/lib/Include/AFSStructs.h @@ -673,4 +673,13 @@ typedef struct _AFS_DIRECTORY_SS_HDR } AFSSnapshotHdr; +typedef struct _AFS_BYTE_RANGE +{ + + LARGE_INTEGER FileOffset; + + LARGE_INTEGER Length; + +} AFSByteRange; + #endif /* _AFS_STRUCTS_H */ -- 1.9.4