/*
* Copyright 2000, International Business Machines Corporation and others.
- *$All Rights Reserved.
+ * All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
#include "afs/afs_cbqueue.h"
#include "afs/afs_osidnlc.h"
+#include <opr/ffs.h>
+
/* Forward declarations. */
static void afs_GetDownD(int anumber, int *aneedSpace, afs_int32 buckethint);
-static void afs_FreeDiscardedDCache(void);
+static int afs_FreeDiscardedDCache(void);
static void afs_DiscardDCache(struct dcache *);
static void afs_FreeDCache(struct dcache *);
/* For split cache */
int cacheDiskType; /*Type of backing disk for cache */
struct afs_cacheOps *afs_cacheType;
+
+/*
+ * The PFlush algorithm makes use of the fact that Fid.Unique is not used in
+ * below hash algorithms. Change it if need be so that flushing algorithm
+ * doesn't move things from one hash chain to another.
+ */
+/*Vnode, Chunk -> Hash table index */
+int DCHash(struct VenusFid *fid, afs_int32 chunk)
+{
+ afs_uint32 buf[3];
+
+ buf[0] = fid->Fid.Volume;
+ buf[1] = fid->Fid.Vnode;
+ buf[2] = chunk;
+ return opr_jhash(buf, 3, 0) & (afs_dhashsize - 1);
+}
+/*Vnode -> Other hash table index */
+int DVHash(struct VenusFid *fid)
+{
+ return opr_jhash_int2(fid->Fid.Volume, fid->Fid.Vnode, 0) &
+ (afs_dhashsize - 1);
+}
+
/*!
* Where is this vcache's entry associated dcache located/
* \param avc The vcache entry.
u_int afs_min_cache = 0;
/*!
+ * If there are waiters for the cache to drain, wake them if
+ * the number of free or discarded cache blocks reaches the
+ * CM_CACHESIZEDDRAINEDPCT limit.
+ *
+ * \note Environment:
+ * This routine must be called with the afs_xdcache lock held
+ * (in write mode).
+ */
+static void
+afs_WakeCacheWaitersIfDrained(void)
+{
+ if (afs_WaitForCacheDrain) {
+ if ((afs_blocksUsed - afs_blocksDiscarded) <=
+ PERCENT(CM_CACHESIZEDRAINEDPCT, afs_cacheBlocks)) {
+ afs_WaitForCacheDrain = 0;
+ afs_osi_Wakeup(&afs_WaitForCacheDrain);
+ }
+ }
+}
+
+/*!
* Keeps the cache clean and free by truncating uneeded files, when used.
* \param
* \return
while (1) {
cb_lowat = PERCENT((CM_DCACHESPACEFREEPCT - CM_DCACHEEXTRAPCT), afs_cacheBlocks);
ObtainWriteLock(&afs_xdcache, 266);
- if (afs_CacheTooFull) {
+ if (afs_CacheTooFull || afs_WaitForCacheDrain) {
int space_needed, slots_needed;
/* if we get woken up, we should try to clean something out */
for (counter = 0; counter < 10; counter++) {
if (slots_needed || space_needed)
afs_GetDownD(slots_needed, &space_needed, 0);
if ((space_needed <= 0) && (slots_needed <= 0)) {
- afs_CacheTooFull = 0;
break;
}
if (afs_termState == AFSOP_STOP_TRUNCDAEMON)
break;
}
- if (!afs_CacheIsTooFull())
+ if (!afs_CacheIsTooFull()) {
afs_CacheTooFull = 0;
+ afs_WakeCacheWaitersIfDrained();
+ }
} /* end of cache cleanup */
ReleaseWriteLock(&afs_xdcache);
*/
while (afs_blocksDiscarded && !afs_WaitForCacheDrain
&& (afs_termState != AFSOP_STOP_TRUNCDAEMON)) {
- afs_FreeDiscardedDCache();
+ int code = afs_FreeDiscardedDCache();
+ if (code) {
+ /* If we can't free any discarded dcache entries, that's okay.
+ * We're just doing this in the background; if someone needs
+ * discarded entries freed, they will try it themselves and/or
+ * signal us that the cache is too full. In any case, we'll
+ * try doing this again the next time we run through the loop.
+ */
+ break;
+ }
}
/* See if we need to continue to run. Someone may have
AFS_STATCNT(afs_AdjustSize);
+ if (newSize > afs_OtherCSize && !(adc->f.fid.Fid.Vnode & 1)) {
+ /* No non-dir cache files should be larger than the chunk size.
+ * (Directory blobs are fetched in a single chunk file, so directories
+ * can be larger.) If someone is requesting that a chunk is larger than
+ * the chunk size, something strange is happening. Log a message about
+ * it, to give a hint to subsequent strange behavior, if any occurs. */
+ static int warned;
+ if (!warned) {
+ warned = 1;
+ afs_warn("afs: Warning: dcache %d is very large (%d > %d). This "
+ "should not happen, but trying to continue regardless. If "
+ "AFS starts hanging or behaving strangely, this might be "
+ "why.\n",
+ adc->index, newSize, afs_OtherCSize);
+ }
+ }
+
adc->dflags |= DFEntryMod;
oldSize = ((adc->f.chunkBytes + afs_fsfragsize) ^ afs_fsfragsize) >> 10; /* round up */
adc->f.chunkBytes = newSize;
} else {
afs_FreeDCache(adc);
}
-
- if (afs_WaitForCacheDrain) {
- if (afs_blocksUsed <=
- PERCENT(CM_CACHESIZEDRAINEDPCT, afs_cacheBlocks)) {
- afs_WaitForCacheDrain = 0;
- afs_osi_Wakeup(&afs_WaitForCacheDrain);
- }
- }
} /*afs_FlushDCache */
afs_indexFlags[adc->index] |= IFFree;
adc->dflags |= DFEntryMod;
- if (afs_WaitForCacheDrain) {
- if ((afs_blocksUsed - afs_blocksDiscarded) <=
- PERCENT(CM_CACHESIZEDRAINEDPCT, afs_cacheBlocks)) {
- afs_WaitForCacheDrain = 0;
- afs_osi_Wakeup(&afs_WaitForCacheDrain);
- }
- }
+ afs_WakeCacheWaitersIfDrained();
} /* afs_FreeDCache */
/*!
adc->dflags |= DFEntryMod;
afs_indexFlags[adc->index] |= IFDiscarded;
- if (afs_WaitForCacheDrain) {
- if ((afs_blocksUsed - afs_blocksDiscarded) <=
- PERCENT(CM_CACHESIZEDRAINEDPCT, afs_cacheBlocks)) {
- afs_WaitForCacheDrain = 0;
- afs_osi_Wakeup(&afs_WaitForCacheDrain);
- }
+ afs_WakeCacheWaitersIfDrained();
+} /*afs_DiscardDCache */
+
+/**
+ * Get a dcache entry from the discard or free list
+ *
+ * @param[out] adc On success, a dcache from the given list. Otherwise, NULL.
+ * @param[in] indexp A pointer to the head of the dcache free list or discard
+ * list (afs_freeDCList, or afs_discardDCList)
+ *
+ * @return 0 on success. If there are no dcache slots available, return ENOSPC.
+ * If we encountered an error in disk i/o while trying to find a
+ * dcache, return EIO.
+ *
+ * @pre afs_xdcache is write-locked
+ */
+static int
+afs_GetDSlotFromList(struct dcache **adc, afs_int32 *indexp)
+{
+ struct dcache *tdc;
+
+ *adc = NULL;
+
+ if (*indexp == NULLIDX) {
+ return ENOSPC;
}
-} /*afs_DiscardDCache */
+ tdc = afs_GetUnusedDSlot(*indexp);
+ if (tdc == NULL) {
+ return EIO;
+ }
+
+ osi_Assert(tdc->refCount == 1);
+ ReleaseReadLock(&tdc->tlock);
+ *indexp = afs_dvnextTbl[tdc->index];
+ afs_dvnextTbl[tdc->index] = NULLIDX;
+
+ *adc = tdc;
+ return 0;
+}
/*!
* Free the next element on the list of discarded cache elements.
+ *
+ * Returns -1 if we encountered an error preventing us from freeing a
+ * discarded dcache, or 0 on success.
*/
-static void
+static int
afs_FreeDiscardedDCache(void)
{
struct dcache *tdc;
ObtainWriteLock(&afs_xdcache, 510);
if (!afs_blocksDiscarded) {
ReleaseWriteLock(&afs_xdcache);
- return;
+ return 0;
}
/*
* Get an entry from the list of discarded cache elements
*/
- tdc = afs_GetNewDSlot(afs_discardDCList);
- osi_Assert(tdc->refCount == 1);
- ReleaseReadLock(&tdc->tlock);
+ (void)afs_GetDSlotFromList(&tdc, &afs_discardDCList);
+ if (!tdc) {
+ ReleaseWriteLock(&afs_xdcache);
+ return -1;
+ }
- afs_discardDCList = afs_dvnextTbl[tdc->index];
- afs_dvnextTbl[tdc->index] = NULLIDX;
afs_discardDCCount--;
size = ((tdc->f.chunkBytes + afs_fsfragsize) ^ afs_fsfragsize) >> 10; /* round up */
afs_blocksDiscarded -= size;
* Truncate the element to reclaim its space
*/
tfile = afs_CFileOpen(&tdc->f.inode);
+ osi_Assert(tfile);
afs_CFileTruncate(tfile, 0);
afs_CFileClose(tfile);
afs_AdjustSize(tdc, 0);
ReleaseWriteLock(&tdc->lock);
afs_PutDCache(tdc);
ReleaseWriteLock(&afs_xdcache);
+
+ return 0;
}
/*!
while (afs_blocksDiscarded
&& (afs_blocksUsed >
PERCENT(CM_WAITFORDRAINPCT, afs_cacheBlocks))) {
- afs_FreeDiscardedDCache();
+ int code = afs_FreeDiscardedDCache();
+ if (code) {
+ /* Callers depend on us to get the afs_blocksDiscarded count down.
+ * If we cannot do that, the callers can spin by calling us over
+ * and over. Panic for now until we can figure out something
+ * better. */
+ osi_Panic("Error freeing discarded dcache");
+ }
}
return 0;
}
if (tdc->refCount == 0) {
if ((ix = tdc->index) == NULLIDX)
osi_Panic("getdowndslot");
- /* pull the entry out of the lruq and put it on the free list */
- QRemove(&tdc->lruq);
/* write-through if modified */
if (tdc->dflags & DFEntryMod) {
AFS_GLOCK();
}
#else
+ int code;
+
+ code = afs_WriteDCache(tdc, 1);
+ if (code) {
+ /*
+ * We couldn't flush it at this time; return early because
+ * if afs_WriteDCache() failed once it is likely to
+ * continue failing for subsequent dcaches.
+ */
+ return;
+ }
tdc->dflags &= ~DFEntryMod;
- osi_Assert(afs_WriteDCache(tdc, 1) == 0);
#endif
}
- /* finally put the entry in the free list */
+ /* pull the entry out of the lruq and put it on the free list */
+ QRemove(&tdc->lruq);
afs_indexTable[ix] = NULL;
afs_indexFlags[ix] &= ~IFEverUsed;
tdc->index = NULLIDX;
if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) {
int releaseTlock = 1;
tdc = afs_GetValidDSlot(index);
- if (!tdc) osi_Panic("afs_TryToSmush tdc");
+ if (!tdc) {
+ /* afs_TryToSmush is best-effort; we may not actually discard
+ * everything, so failure to discard dcaches due to an i/o
+ * error is okay. */
+ break;
+ }
if (!FidCmp(&tdc->f.fid, &avc->f.fid)) {
if (sync) {
if ((afs_indexFlags[index] & IFDataMod) == 0
i = afs_dvnextTbl[index];
if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) {
tdc = afs_GetValidDSlot(index);
- if (tdc) {
- if (!FidCmp(&tdc->f.fid, &avc->f.fid)) {
- totalChunks--;
- }
- ReleaseReadLock(&tdc->tlock);
- afs_PutDCache(tdc);
- }
+ if (!tdc) {
+ break;
+ }
+ if (!FidCmp(&tdc->f.fid, &avc->f.fid)) {
+ totalChunks--;
+ }
+ ReleaseReadLock(&tdc->tlock);
+ afs_PutDCache(tdc);
}
}
ReleaseWriteLock(&afs_xdcache);
*/
i = DCHash(&avc->f.fid, chunk);
ObtainWriteLock(&afs_xdcache, 278);
- for (index = afs_dchashTbl[i]; index != NULLIDX;) {
+ for (index = afs_dchashTbl[i]; index != NULLIDX; index = afs_dcnextTbl[index]) {
if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) {
tdc = afs_GetValidDSlot(index);
- if (!tdc) osi_Panic("afs_FindDCache tdc");
+ if (!tdc) {
+ /* afs_FindDCache is best-effort; we may not find the given
+ * file/offset, so if we cannot find the given dcache due to
+ * i/o errors, that is okay. */
+ index = NULLIDX;
+ break;
+ }
ReleaseReadLock(&tdc->tlock);
if (!FidCmp(&tdc->f.fid, &avc->f.fid) && chunk == tdc->f.chunk) {
break; /* leaving refCount high for caller */
}
afs_PutDCache(tdc);
}
- index = afs_dcnextTbl[index];
}
if (index != NULLIDX) {
hset(afs_indexTimes[tdc->index], afs_indexCounter);
return NULL;
} /*afs_FindDCache */
+/* only call these from afs_AllocDCache() */
+static int
+afs_AllocFreeDSlot(struct dcache **adc)
+{
+ int code;
+ struct dcache *tdc;
+
+ code = afs_GetDSlotFromList(&tdc, &afs_freeDCList);
+ if (code) {
+ return code;
+ }
+ afs_indexFlags[tdc->index] &= ~IFFree;
+ ObtainWriteLock(&tdc->lock, 604);
+ afs_freeDCCount--;
+
+ *adc = tdc;
+ return 0;
+}
+static int
+afs_AllocDiscardDSlot(struct dcache **adc, afs_int32 lock)
+{
+ int code;
+ struct dcache *tdc;
+ afs_uint32 size = 0;
+ struct osi_file *file;
+
+ code = afs_GetDSlotFromList(&tdc, &afs_discardDCList);
+ if (code) {
+ return code;
+ }
+ afs_indexFlags[tdc->index] &= ~IFDiscarded;
+ ObtainWriteLock(&tdc->lock, 605);
+ afs_discardDCCount--;
+ size =
+ ((tdc->f.chunkBytes +
+ afs_fsfragsize) ^ afs_fsfragsize) >> 10;
+ tdc->f.states &= ~(DRO|DBackup|DRW);
+ afs_DCMoveBucket(tdc, size, 0);
+ afs_blocksDiscarded -= size;
+ afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded;
+ if ((lock & 2)) {
+ /* Truncate the chunk so zeroes get filled properly */
+ file = afs_CFileOpen(&tdc->f.inode);
+ osi_Assert(file);
+ afs_CFileTruncate(file, 0);
+ afs_CFileClose(file);
+ afs_AdjustSize(tdc, 0);
+ }
+
+ *adc = tdc;
+ return 0;
+}
/*!
* Get a fresh dcache from the free or discarded list.
*
+ * \param adc Set to the new dcache on success, and NULL on error.
* \param avc Who's dcache is this going to be?
* \param chunk The position where it will be placed in.
* \param lock How are locks held.
* - avc (R if (lock & 1) set and W otherwise)
* \note It write locks the new dcache. The caller must unlock it.
*
- * \return The new dcache.
+ * \return If we're out of dslots, ENOSPC. If we encountered disk errors, EIO.
+ * On success, return 0.
*/
-struct dcache *
-afs_AllocDCache(struct vcache *avc, afs_int32 chunk, afs_int32 lock,
- struct VenusFid *ashFid)
+static int
+afs_AllocDCache(struct dcache **adc, struct vcache *avc, afs_int32 chunk,
+ afs_int32 lock, struct VenusFid *ashFid)
{
+ int code;
struct dcache *tdc = NULL;
- afs_uint32 size = 0;
- struct osi_file *file;
- if (afs_discardDCList == NULLIDX
- || ((lock & 2) && afs_freeDCList != NULLIDX)) {
+ *adc = NULL;
- afs_indexFlags[afs_freeDCList] &= ~IFFree;
- tdc = afs_GetNewDSlot(afs_freeDCList);
- osi_Assert(tdc->refCount == 1);
- ReleaseReadLock(&tdc->tlock);
- ObtainWriteLock(&tdc->lock, 604);
- afs_freeDCList = afs_dvnextTbl[tdc->index];
- afs_freeDCCount--;
+ /* if (lock & 2), prefer 'free' dcaches; otherwise, prefer 'discard'
+ * dcaches. In either case, try both if our first choice doesn't work due
+ * to ENOSPC. */
+ if ((lock & 2)) {
+ code = afs_AllocFreeDSlot(&tdc);
+ if (code == ENOSPC) {
+ code = afs_AllocDiscardDSlot(&tdc, lock);
+ }
} else {
- afs_indexFlags[afs_discardDCList] &= ~IFDiscarded;
- tdc = afs_GetNewDSlot(afs_discardDCList);
- osi_Assert(tdc->refCount == 1);
- ReleaseReadLock(&tdc->tlock);
- ObtainWriteLock(&tdc->lock, 605);
- afs_discardDCList = afs_dvnextTbl[tdc->index];
- afs_discardDCCount--;
- size =
- ((tdc->f.chunkBytes +
- afs_fsfragsize) ^ afs_fsfragsize) >> 10;
- tdc->f.states &= ~(DRO|DBackup|DRW);
- afs_DCMoveBucket(tdc, size, 0);
- afs_blocksDiscarded -= size;
- afs_stats_cmperf.cacheBlocksDiscarded = afs_blocksDiscarded;
- if (lock & 2) {
- /* Truncate the chunk so zeroes get filled properly */
- file = afs_CFileOpen(&tdc->f.inode);
- afs_CFileTruncate(file, 0);
- afs_CFileClose(file);
- afs_AdjustSize(tdc, 0);
+ code = afs_AllocDiscardDSlot(&tdc, lock);
+ if (code == ENOSPC) {
+ code = afs_AllocFreeDSlot(&tdc);
}
}
+ if (code) {
+ return code;
+ }
/*
* Locks held:
if (tdc->lruq.prev == &tdc->lruq)
osi_Panic("lruq 1");
- return tdc;
+ *adc = tdc;
+ return 0;
}
/*
afs_int32 index;
afs_int32 us;
afs_int32 chunk;
- afs_size_t maxGoodLength; /* amount of good data at server */
afs_size_t Position = 0;
afs_int32 size, tlen; /* size of segment to transfer */
struct afs_FetchOutput *tsmall = 0;
*/
if (!tdc) { /* If the hint wasn't the right dcache entry */
+ int dslot_error = 0;
/*
* Hash on the [fid, chunk] and get the corresponding dcache index
* after write-locking the dcache.
ObtainWriteLock(&afs_xdcache, 280);
us = NULLIDX;
- for (index = afs_dchashTbl[i]; index != NULLIDX;) {
+ for (index = afs_dchashTbl[i]; index != NULLIDX; us = index, index = afs_dcnextTbl[index]) {
if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) {
tdc = afs_GetValidDSlot(index);
if (!tdc) {
- ReleaseWriteLock(&afs_xdcache);
- goto done;
+ /* we got an i/o error when trying to get the given dslot.
+ * it's possible the dslot we're looking for is elsewhere,
+ * but most likely the disk cache is currently unusable, so
+ * all afs_GetValidDSlot calls will fail, so just bail out. */
+ dslot_error = 1;
+ index = NULLIDX;
+ break;
}
ReleaseReadLock(&tdc->tlock);
/*
afs_PutDCache(tdc);
tdc = 0;
}
- us = index;
- index = afs_dcnextTbl[index];
}
/*
afs_Trace2(afs_iclSetp, CM_TRACE_GETDCACHE1, ICL_TYPE_POINTER,
avc, ICL_TYPE_INT32, chunk);
- /* Make sure there is a free dcache entry for us to use */
+ if (dslot_error) {
+ /* We couldn't find the dcache we want, but we hit some i/o
+ * errors when trying to find it, so we're not sure if the
+ * dcache we want is in the cache or not. Error out, so we
+ * don't try to possibly create 2 separate dcaches for the
+ * same exact data. */
+ ReleaseWriteLock(&afs_xdcache);
+ goto done;
+ }
+
if (afs_discardDCList == NULLIDX && afs_freeDCList == NULLIDX) {
- while (1) {
- if (!setLocks)
- avc->f.states |= CDCLock;
- /* just need slots */
- afs_GetDownD(5, (int *)0, afs_DCGetBucket(avc));
- if (!setLocks)
- avc->f.states &= ~CDCLock;
- if (afs_discardDCList != NULLIDX
- || afs_freeDCList != NULLIDX)
- break;
- /* If we can't get space for 5 mins we give up and panic */
- if (++downDCount > 300) {
- osi_Panic("getdcache");
+ if (!setLocks)
+ avc->f.states |= CDCLock;
+ /* just need slots */
+ afs_GetDownD(5, (int *)0, afs_DCGetBucket(avc));
+ if (!setLocks)
+ avc->f.states &= ~CDCLock;
+ }
+ code = afs_AllocDCache(&tdc, avc, chunk, aflags, NULL);
+ if (code) {
+ ReleaseWriteLock(&afs_xdcache);
+ if (code == ENOSPC) {
+ /* It looks like afs_AllocDCache failed because we don't
+ * have any free dslots to use. Maybe if we wait a little
+ * while, we'll be able to free up some slots, so try for 5
+ * minutes, then bail out. */
+ if (++downDCount > 300) {
+ afs_warn("afs: Unable to get free cache space for file "
+ "%u:%u.%u.%u for 5 minutes; failing with an i/o error\n",
+ avc->f.fid.Cell,
+ avc->f.fid.Fid.Volume,
+ avc->f.fid.Fid.Vnode,
+ avc->f.fid.Fid.Unique);
+ goto done;
}
- ReleaseWriteLock(&afs_xdcache);
- /*
- * Locks held:
- * avc->lock(R) if setLocks
- * avc->lock(W) if !setLocks
- */
- afs_osi_Wait(1000, 0, 0);
- goto RetryLookup;
- }
+ afs_osi_Wait(1000, 0, 0);
+ goto RetryLookup;
+ }
+
+ /* afs_AllocDCache failed, but not because we're out of free
+ * dslots. Something must be screwy with the cache, so bail out
+ * immediately without waiting. */
+ afs_warn("afs: Error while alloc'ing cache slot for file "
+ "%u:%u.%u.%u; failing with an i/o error\n",
+ avc->f.fid.Cell,
+ avc->f.fid.Fid.Volume,
+ avc->f.fid.Fid.Vnode,
+ avc->f.fid.Fid.Unique);
+ goto done;
}
- tdc = afs_AllocDCache(avc, chunk, aflags, NULL);
+ /*
+ * Locks held:
+ * avc->lock(R) if setLocks
+ * avc->lock(W) if !setLocks
+ * tdc->lock(W)
+ * afs_xdcache(W)
+ */
/*
* Now add to the two hash chains - note that i is still set
/* no data in file to read at this position */
UpgradeSToWLock(&tdc->lock, 607);
file = afs_CFileOpen(&tdc->f.inode);
+ osi_Assert(file);
afs_CFileTruncate(file, 0);
afs_CFileClose(file);
afs_AdjustSize(tdc, 0);
goto RetryGetDCache;
}
- /* Do not fetch data beyond truncPos. */
- maxGoodLength = avc->f.m.Length;
- if (avc->f.truncPos < maxGoodLength)
- maxGoodLength = avc->f.truncPos;
Position = AFS_CHUNKBASE(abyte);
if (vType(avc) == VDIR) {
size = avc->f.m.Length;
}
size = 999999999; /* max size for transfer */
} else {
+ afs_size_t maxGoodLength;
+
+ /* estimate how much data we're expecting back from the server,
+ * and reserve space in the dcache entry for it */
+
+ maxGoodLength = avc->f.m.Length;
+ if (avc->f.truncPos < maxGoodLength)
+ maxGoodLength = avc->f.truncPos;
+
size = AFS_CHUNKSIZE(abyte); /* expected max size */
- /* don't read past end of good data on server */
- if (Position + size > maxGoodLength)
+ if (Position > maxGoodLength) { /* If we're beyond EOF */
+ size = 0;
+ } else if (Position + size > maxGoodLength) {
size = maxGoodLength - Position;
- if (size < 0)
- size = 0; /* Handle random races */
+ }
+ osi_Assert(size >= 0);
+
if (size > tdc->f.chunkBytes) {
- /* pre-reserve space for file */
+ /* pre-reserve estimated space for file */
afs_AdjustSize(tdc, size); /* changes chunkBytes */
- /* max size for transfer still in size */
+ }
+
+ if (size) {
+ /* For the actual fetch, do not limit the request to the
+ * length of the file. If this results in a read past EOF on
+ * the server, the server will just reply with less data than
+ * requested. If we limit ourselves to only requesting data up
+ * to the avc file length, we open ourselves up to races if the
+ * file is extended on the server at about the same time.
+ *
+ * However, we must restrict ourselves to the avc->f.truncPos
+ * length, since this represents an outstanding local
+ * truncation of the file that will be committed to the
+ * fileserver when we actually write the fileserver contents.
+ * If we do not restrict the fetch length based on
+ * avc->f.truncPos, a different truncate operation extending
+ * the file length could cause the old data after
+ * avc->f.truncPos to reappear, instead of extending the file
+ * with NUL bytes. */
+ size = AFS_CHUNKSIZE(abyte);
+ if (Position > avc->f.truncPos) {
+ size = 0;
+ } else if (Position + size > avc->f.truncPos) {
+ size = avc->f.truncPos - Position;
+ }
+ osi_Assert(size >= 0);
}
}
if (afs_mariner && !tdc->f.chunk)
*/
DZap(tdc); /* pages in cache may be old */
file = afs_CFileOpen(&tdc->f.inode);
+ if (!file) {
+ /* We can't access the file in the disk cache backing this dcache;
+ * bail out. */
+ ReleaseWriteLock(&tdc->lock);
+ afs_PutDCache(tdc);
+ tdc = NULL;
+ goto done;
+ }
afs_RemoveVCB(&avc->f.fid);
tdc->f.states |= DWriting;
tdc->dflags |= DFFetching;
ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32,
tdc->dflags);
}
- tsmall =
- (struct afs_FetchOutput *)osi_AllocLargeSpace(sizeof(struct afs_FetchOutput));
+ tsmall = osi_AllocLargeSpace(sizeof(struct afs_FetchOutput));
setVcacheStatus = 0;
#ifndef AFS_NOSTATS
/*
afs_CFileTruncate(file, size); /* prune it */
} else {
if (!setLocks || slowPass) {
- ObtainWriteLock(&afs_xcbhash, 453);
- afs_DequeueCallback(avc);
- avc->f.states &= ~(CStatd | CUnique);
- avc->callback = NULL;
- ReleaseWriteLock(&afs_xcbhash);
- if (avc->f.fid.Fid.Vnode & 1 || (vType(avc) == VDIR))
- osi_dnlc_purgedp(avc);
+ afs_StaleVCacheFlags(avc, AFS_STALEVC_CLEARCB, CUnique);
} else {
/* Something lost. Forget about performance, and go
* back with a vcache write lock.
ReleaseWriteLock(&tdc->lock);
afs_PutDCache(tdc);
tdc = 0;
+
+ /*
+ * Call afs_Analyze to manage the connection references
+ * and handle the error code (possibly mark servers
+ * down, etc). We are going to retry getting the
+ * dcache regardless, so we just ignore the retry hint
+ * returned by afs_Analyze on this call.
+ */
+ (void)afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
+ AFS_STATS_FS_RPCIDX_FETCHDATA, SHARED_LOCK, NULL);
+
ReleaseReadLock(&avc->lock);
+
slowPass = 1;
goto RetryGetDCache;
}
ReleaseWriteLock(&tdc->lock);
afs_PutDCache(tdc);
if (!afs_IsDynroot(avc)) {
- ObtainWriteLock(&afs_xcbhash, 454);
- afs_DequeueCallback(avc);
- avc->f.states &= ~(CStatd | CUnique);
- ReleaseWriteLock(&afs_xcbhash);
- if (avc->f.fid.Fid.Vnode & 1 || (vType(avc) == VDIR))
- osi_dnlc_purgedp(avc);
+ afs_StaleVCacheFlags(avc, 0, CUnique);
/*
* Locks held:
* avc->lock(W); assert(!setLocks || slowPass)
* Environment:
* The afs_xdcache is write-locked through this whole affair.
*/
-void
+int
afs_WriteThroughDSlots(void)
{
struct dcache *tdc;
afs_int32 i, touchedit = 0;
+ int code = 0;
struct afs_q DirtyQ, *tq;
#define DQTODC(q) ((struct dcache *)(((char *) (q)) - sizeof(struct afs_q)))
- for (tq = DirtyQ.prev; tq != &DirtyQ; tq = QPrev(tq)) {
+ for (tq = DirtyQ.prev; tq != &DirtyQ && code == 0; tq = QPrev(tq)) {
tdc = DQTODC(tq);
if (tdc->dflags & DFEntryMod) {
int wrLock;
if (wrLock && (tdc->dflags & DFEntryMod)) {
tdc->dflags &= ~DFEntryMod;
ObtainWriteLock(&afs_xdcache, 620);
- osi_Assert(afs_WriteDCache(tdc, 1) == 0);
+ code = afs_WriteDCache(tdc, 1);
ReleaseWriteLock(&afs_xdcache);
- touchedit = 1;
+ if (code) {
+ /* We didn't successfully write out the dslot; make sure we
+ * try again later */
+ tdc->dflags |= DFEntryMod;
+ } else {
+ touchedit = 1;
+ }
}
if (wrLock)
ReleaseWriteLock(&tdc->lock);
afs_PutDCache(tdc);
}
+ if (code) {
+ return code;
+ }
+
ObtainWriteLock(&afs_xdcache, 617);
if (!touchedit && (cacheDiskType != AFS_FCACHE_TYPE_MEM)) {
/* Touch the file to make sure that the mtime on the file is kept
*/
struct afs_fheader theader;
- theader.magic = AFS_FHMAGIC;
- theader.firstCSize = AFS_FIRSTCSIZE;
- theader.otherCSize = AFS_OTHERCSIZE;
- theader.version = AFS_CI_VERSION;
- theader.dataSize = sizeof(struct fcache);
+ afs_InitFHeader(&theader);
afs_osi_Write(afs_cacheInodep, 0, &theader, sizeof(theader));
}
ReleaseWriteLock(&afs_xdcache);
+ return 0;
}
/*
*
* Parameters:
* aslot : Dcache slot to look at.
+ * type : What 'type' of dslot to get; see the dslot_state enum
*
* Environment:
* Must be called with afs_xdcache write-locked.
*/
struct dcache *
-afs_MemGetDSlot(afs_int32 aslot, int needvalid)
+afs_MemGetDSlot(afs_int32 aslot, dslot_state type)
{
struct dcache *tdc;
int existing = 0;
return tdc;
}
- osi_Assert(!needvalid);
+ /* if we got here, the given slot is not in memory in our list of known
+ * slots. for memcache, the only place a dslot can exist is in memory, so
+ * if the caller is expecting to get back a known dslot, and we've reached
+ * here, something is very wrong. DSLOT_NEW is the only type of dslot that
+ * may not exist; for all others, the caller assumes the given dslot
+ * already exists. so, 'type' had better be DSLOT_NEW here, or something is
+ * very wrong. */
+ osi_Assert(type == DSLOT_NEW);
if (!afs_freeDSList)
afs_GetDownDSlot(4);
*
* Parameters:
* aslot : Dcache slot to look at.
+ * type : What 'type' of dslot to get; see the dslot_state enum
*
* Environment:
* afs_xdcache lock write-locked.
*/
struct dcache *
-afs_UFSGetDSlot(afs_int32 aslot, int needvalid)
+afs_UFSGetDSlot(afs_int32 aslot, dslot_state type)
{
afs_int32 code;
struct dcache *tdc;
entryok = 0;
#if defined(KERNEL_HAVE_UERROR)
last_error = getuerror();
+#else
+ last_error = code;
#endif
lasterrtime = osi_Time();
- if (needvalid) {
+ if (type != DSLOT_NEW) {
+ /* If we are requesting a non-DSLOT_NEW slot, this is an error.
+ * non-DSLOT_NEW slots are supposed to already exist, so if we
+ * failed to read in the slot, something is wrong. */
struct osi_stat tstat;
if (afs_osi_Stat(afs_cacheInodep, &tstat)) {
tstat.size = -1;
}
- afs_warn("afs: disk cache read error in CacheItems off %d/%d "
- "code %d/%d\n",
+ afs_warn("afs: disk cache read error in CacheItems slot %d "
+ "off %d/%d code %d/%d\n",
+ (int)aslot,
off, (int)tstat.size,
(int)code, (int)sizeof(struct fcache));
/* put tdc back on the free dslot list */
}
if (!afs_CellNumValid(tdc->f.fid.Cell)) {
entryok = 0;
- if (needvalid) {
+ if (type == DSLOT_VALID) {
osi_Panic("afs: needed valid dcache but index %d off %d has "
"invalid cell num %d\n",
(int)aslot, off, (int)tdc->f.fid.Cell);
}
}
- if (needvalid && tdc->f.fid.Fid.Volume == 0) {
+ if (type == DSLOT_VALID && tdc->f.fid.Fid.Volume == 0) {
osi_Panic("afs: invalid zero-volume dcache entry at slot %d off %d",
(int)aslot, off);
}
+ if (type == DSLOT_UNUSED) {
+ /* the requested dslot is known to exist, but contain invalid data
+ * (this happens when we're using a dslot from the free or discard
+ * list). be sure not to re-use the data in it, so force invalidation.
+ */
+ entryok = 0;
+ }
+
if (!entryok) {
tdc->f.fid.Cell = 0;
tdc->f.fid.Fid.Volume = 0;
tdc->f.states &= ~(DRO|DBackup|DRW);
afs_DCMoveBucket(tdc, 0, 0);
} else {
- if (&tdc->f != 0) {
- if (tdc->f.states & DRO) {
- afs_DCMoveBucket(tdc, 0, 2);
- } else if (tdc->f.states & DBackup) {
- afs_DCMoveBucket(tdc, 0, 1);
- } else {
- afs_DCMoveBucket(tdc, 0, 1);
- }
+ if (tdc->f.states & DRO) {
+ afs_DCMoveBucket(tdc, 0, 2);
+ } else if (tdc->f.states & DBackup) {
+ afs_DCMoveBucket(tdc, 0, 1);
+ } else {
+ afs_DCMoveBucket(tdc, 0, 1);
}
}
tdc->refCount = 1;
* is already being handled by the higher-level code.
*/
if ((avc->f.states & CSafeStore) == 0) {
- tb->code = 0;
+ tb->code_raw = tb->code_checkcode = 0;
tb->flags |= BUVALID;
if (tb->flags & BUWAIT) {
tb->flags &= ~BUWAIT;
return 0;
}
-
/*!
* Given a file name and inode, set up that file to be an
* active member in the AFS cache. This also involves checking
ObtainWriteLock(&tdc->lock, 621);
ObtainWriteLock(&afs_xdcache, 622);
- if (afile) {
- code = afs_LookupInodeByPath(afile, &tdc->f.inode.ufs, NULL);
- if (code) {
- ReleaseWriteLock(&afs_xdcache);
- ReleaseWriteLock(&tdc->lock);
- afs_PutDCache(tdc);
- return code;
- }
+ if (!afile && !ainode) {
+ tfile = NULL;
+ fileIsBad = 1;
} else {
- /* Add any other 'complex' inode types here ... */
+ if (afile) {
+ code = afs_LookupInodeByPath(afile, &tdc->f.inode.ufs, NULL);
+ if (code) {
+ ReleaseWriteLock(&afs_xdcache);
+ ReleaseWriteLock(&tdc->lock);
+ afs_PutDCache(tdc);
+ return code;
+ }
+ } else {
+ /* Add any other 'complex' inode types here ... */
#if !defined(AFS_LINUX26_ENV) && !defined(AFS_CACHE_VNODE_PATH)
- tdc->f.inode.ufs = ainode;
+ tdc->f.inode.ufs = ainode;
#else
- osi_Panic("Can't init cache with inode numbers when complex inodes are "
- "in use\n");
+ osi_Panic("Can't init cache with inode numbers when complex inodes are "
+ "in use\n");
#endif
- }
- fileIsBad = 0;
- if ((tdc->f.states & DWriting) || tdc->f.fid.Fid.Volume == 0)
- fileIsBad = 1;
- tfile = osi_UFSOpen(&tdc->f.inode);
- code = afs_osi_Stat(tfile, &tstat);
- if (code)
- osi_Panic("initcachefile stat");
+ }
+ fileIsBad = 0;
+ if ((tdc->f.states & DWriting) || tdc->f.fid.Fid.Volume == 0)
+ fileIsBad = 1;
+ tfile = osi_UFSOpen(&tdc->f.inode);
+ if (!tfile) {
+ ReleaseWriteLock(&afs_xdcache);
+ ReleaseWriteLock(&tdc->lock);
+ afs_PutDCache(tdc);
+ return ENOENT;
+ }
- /*
- * If file size doesn't match the cache info file, it's probably bad.
- */
- if (tdc->f.chunkBytes != tstat.size)
- fileIsBad = 1;
+ code = afs_osi_Stat(tfile, &tstat);
+ if (code)
+ osi_Panic("initcachefile stat");
+
+ /*
+ * If file size doesn't match the cache info file, it's probably bad.
+ */
+ if (tdc->f.chunkBytes != tstat.size)
+ fileIsBad = 1;
+ /*
+ * If file changed within T (120?) seconds of cache info file, it's
+ * probably bad. In addition, if slot changed within last T seconds,
+ * the cache info file may be incorrectly identified, and so slot
+ * may be bad.
+ */
+ if (cacheInfoModTime < tstat.mtime + 120)
+ fileIsBad = 1;
+ if (cacheInfoModTime < tdc->f.modTime + 120)
+ fileIsBad = 1;
+ /* In case write through is behind, make sure cache items entry is
+ * at least as new as the chunk.
+ */
+ if (tdc->f.modTime < tstat.mtime)
+ fileIsBad = 1;
+ }
tdc->f.chunkBytes = 0;
- /*
- * If file changed within T (120?) seconds of cache info file, it's
- * probably bad. In addition, if slot changed within last T seconds,
- * the cache info file may be incorrectly identified, and so slot
- * may be bad.
- */
- if (cacheInfoModTime < tstat.mtime + 120)
- fileIsBad = 1;
- if (cacheInfoModTime < tdc->f.modTime + 120)
- fileIsBad = 1;
- /* In case write through is behind, make sure cache items entry is
- * at least as new as the chunk.
- */
- if (tdc->f.modTime < tstat.mtime)
- fileIsBad = 1;
if (fileIsBad) {
tdc->f.fid.Fid.Volume = 0; /* not in the hash table */
- if (tstat.size != 0)
+ if (tfile && tstat.size != 0)
osi_UFSTruncate(tfile, 0);
tdc->f.states &= ~(DRO|DBackup|DRW);
afs_DCMoveBucket(tdc, 0, 0);
afs_indexUnique[index] = tdc->f.fid.Fid.Unique;
} /*File is not bad */
- osi_UFSClose(tfile);
+ if (tfile)
+ osi_UFSClose(tfile);
tdc->f.states &= ~DWriting;
tdc->dflags &= ~DFEntryMod;
/* don't set f.modTime; we're just cleaning up */
* \param aflags
*
*/
-void
+int
afs_dcacheInit(int afiles, int ablocks, int aDentries, int achunk, int aflags)
{
struct dcache *tdp;
int i;
int code;
+ int afs_dhashbits;
afs_freeDCList = NULLIDX;
afs_discardDCList = NULLIDX;
if (!aDentries)
aDentries = DDSIZE;
- if (aflags & AFSCALL_INIT_MEMCACHE) {
- /*
- * Use a memory cache instead of a disk cache
- */
- cacheDiskType = AFS_FCACHE_TYPE_MEM;
- afs_cacheType = &afs_MemCacheOps;
- afiles = (afiles < aDentries) ? afiles : aDentries; /* min */
- ablocks = afiles * (AFS_FIRSTCSIZE / 1024);
- /* ablocks is reported in 1K blocks */
- code = afs_InitMemCache(afiles, AFS_FIRSTCSIZE, aflags);
- if (code != 0) {
- afs_warn("afsd: memory cache too large for available memory.\n");
- afs_warn("afsd: AFS files cannot be accessed.\n\n");
- dcacheDisabled = 1;
- afiles = ablocks = 0;
- } else
- afs_warn("Memory cache: Allocating %d dcache entries...",
- aDentries);
- } else {
- cacheDiskType = AFS_FCACHE_TYPE_UFS;
- afs_cacheType = &afs_UfsCacheOps;
- }
-
+ /* afs_dhashsize defaults to 1024 */
if (aDentries > 512)
afs_dhashsize = 2048;
+ /* Try to keep the average chain length around two unless the table
+ * would be ridiculously big. */
+ if (aDentries > 4096) {
+ afs_dhashbits = opr_fls(aDentries) - 3;
+ /* Cap the hash tables to 32k entries. */
+ if (afs_dhashbits > 15)
+ afs_dhashbits = 15;
+ afs_dhashsize = opr_jhash_size(afs_dhashbits);
+ }
/* initialize hash tables */
afs_dvhashTbl = afs_osi_Alloc(afs_dhashsize * sizeof(afs_int32));
osi_Assert(afs_dvhashTbl != NULL);
afs_stats_cmperf.cacheBucket2_Discarded = 0;
afs_DCSizeInit();
QInit(&afs_DLRU);
+
+ if (aflags & AFSCALL_INIT_MEMCACHE) {
+ /*
+ * Use a memory cache instead of a disk cache
+ */
+ cacheDiskType = AFS_FCACHE_TYPE_MEM;
+ afs_cacheType = &afs_MemCacheOps;
+ afiles = (afiles < aDentries) ? afiles : aDentries; /* min */
+ ablocks = afiles * (AFS_FIRSTCSIZE / 1024);
+ /* ablocks is reported in 1K blocks */
+ code = afs_InitMemCache(afiles, AFS_FIRSTCSIZE, aflags);
+ if (code != 0) {
+ afs_warn("afsd: memory cache too large for available memory.\n");
+ afs_warn("afsd: AFS files cannot be accessed.\n\n");
+ dcacheDisabled = 1;
+ return code;
+ } else
+ afs_warn("Memory cache: Allocating %d dcache entries...",
+ aDentries);
+ } else {
+ cacheDiskType = AFS_FCACHE_TYPE_UFS;
+ afs_cacheType = &afs_UfsCacheOps;
+ }
+ return 0;
}
/*!
ObtainWriteLock(&afs_xdcache, 716);
/* Get a fresh dcache. */
- new_dc = afs_AllocDCache(avc, 0, 0, &shadow_fid);
+ (void)afs_AllocDCache(&new_dc, avc, 0, 0, &shadow_fid);
+ osi_Assert(new_dc);
ObtainReadLock(&adc->mflock);
/* Open the files. */
tfile_src = afs_CFileOpen(&adc->f.inode);
tfile_dst = afs_CFileOpen(&new_dc->f.inode);
+ osi_Assert(tfile_src);
+ osi_Assert(tfile_dst);
/* And now copy dir dcache data into this dcache,
* 4k at a time.