/**
* Get a dcache entry from the discard or free list
*
+ * @param[out] adc On success, a dcache from the given list. Otherwise, NULL.
* @param[in] indexp A pointer to the head of the dcache free list or discard
* list (afs_freeDCList, or afs_discardDCList)
*
- * @return A dcache from that list, or NULL if none could be retrieved.
+ * @return 0 on success. If there are no dcache slots available, return ENOSPC.
+ * If we encountered an error in disk i/o while trying to find a
+ * dcache, return EIO.
*
* @pre afs_xdcache is write-locked
*/
-static struct dcache *
-afs_GetDSlotFromList(afs_int32 *indexp)
+static int
+afs_GetDSlotFromList(struct dcache **adc, afs_int32 *indexp)
{
struct dcache *tdc;
- for ( ; *indexp != NULLIDX; indexp = &afs_dvnextTbl[*indexp]) {
- tdc = afs_GetUnusedDSlot(*indexp);
- if (tdc) {
- osi_Assert(tdc->refCount == 1);
- ReleaseReadLock(&tdc->tlock);
- *indexp = afs_dvnextTbl[tdc->index];
- afs_dvnextTbl[tdc->index] = NULLIDX;
- return tdc;
- }
+ *adc = NULL;
+
+ if (*indexp == NULLIDX) {
+ return ENOSPC;
}
- return NULL;
+
+ tdc = afs_GetUnusedDSlot(*indexp);
+ if (tdc == NULL) {
+ return EIO;
+ }
+
+ osi_Assert(tdc->refCount == 1);
+ ReleaseReadLock(&tdc->tlock);
+ *indexp = afs_dvnextTbl[tdc->index];
+ afs_dvnextTbl[tdc->index] = NULLIDX;
+
+ *adc = tdc;
+ return 0;
}
/*!
/*
* Get an entry from the list of discarded cache elements
*/
- tdc = afs_GetDSlotFromList(&afs_discardDCList);
+ (void)afs_GetDSlotFromList(&tdc, &afs_discardDCList);
if (!tdc) {
ReleaseWriteLock(&afs_xdcache);
return -1;
if (tdc->refCount == 0) {
if ((ix = tdc->index) == NULLIDX)
osi_Panic("getdowndslot");
- /* pull the entry out of the lruq and put it on the free list */
- QRemove(&tdc->lruq);
/* write-through if modified */
if (tdc->dflags & DFEntryMod) {
AFS_GLOCK();
}
#else
+ int code;
+
+ code = afs_WriteDCache(tdc, 1);
+ if (code) {
+ /*
+ * We couldn't flush it at this time; return early because
+ * if afs_WriteDCache() failed once it is likely to
+ * continue failing for subsequent dcaches.
+ */
+ return;
+ }
tdc->dflags &= ~DFEntryMod;
- osi_Assert(afs_WriteDCache(tdc, 1) == 0);
#endif
}
- /* finally put the entry in the free list */
+ /* pull the entry out of the lruq and put it on the free list */
+ QRemove(&tdc->lruq);
afs_indexTable[ix] = NULL;
afs_indexFlags[ix] &= ~IFEverUsed;
tdc->index = NULLIDX;
tdc = afs_GetValidDSlot(index);
if (!tdc) {
/* afs_TryToSmush is best-effort; we may not actually discard
- * everything, so failure to discard a dcache due to an i/o
+ * everything, so failure to discard dcaches due to an i/o
* error is okay. */
- continue;
+ break;
}
if (!FidCmp(&tdc->f.fid, &avc->f.fid)) {
if (sync) {
i = afs_dvnextTbl[index];
if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) {
tdc = afs_GetValidDSlot(index);
- if (tdc) {
- if (!FidCmp(&tdc->f.fid, &avc->f.fid)) {
- totalChunks--;
- }
- ReleaseReadLock(&tdc->tlock);
- afs_PutDCache(tdc);
- }
+ if (!tdc) {
+ break;
+ }
+ if (!FidCmp(&tdc->f.fid, &avc->f.fid)) {
+ totalChunks--;
+ }
+ ReleaseReadLock(&tdc->tlock);
+ afs_PutDCache(tdc);
}
}
ReleaseWriteLock(&afs_xdcache);
/* afs_FindDCache is best-effort; we may not find the given
* file/offset, so if we cannot find the given dcache due to
* i/o errors, that is okay. */
- continue;
+ index = NULLIDX;
+ break;
}
ReleaseReadLock(&tdc->tlock);
if (!FidCmp(&tdc->f.fid, &avc->f.fid) && chunk == tdc->f.chunk) {
} /*afs_FindDCache */
/* only call these from afs_AllocDCache() */
-static struct dcache *
-afs_AllocFreeDSlot(void)
+static int
+afs_AllocFreeDSlot(struct dcache **adc)
{
+ int code;
struct dcache *tdc;
- tdc = afs_GetDSlotFromList(&afs_freeDCList);
- if (!tdc) {
- return NULL;
+ code = afs_GetDSlotFromList(&tdc, &afs_freeDCList);
+ if (code) {
+ return code;
}
afs_indexFlags[tdc->index] &= ~IFFree;
ObtainWriteLock(&tdc->lock, 604);
afs_freeDCCount--;
- return tdc;
+ *adc = tdc;
+ return 0;
}
-static struct dcache *
-afs_AllocDiscardDSlot(afs_int32 lock)
+static int
+afs_AllocDiscardDSlot(struct dcache **adc, afs_int32 lock)
{
+ int code;
struct dcache *tdc;
afs_uint32 size = 0;
struct osi_file *file;
- tdc = afs_GetDSlotFromList(&afs_discardDCList);
- if (!tdc) {
- return NULL;
+ code = afs_GetDSlotFromList(&tdc, &afs_discardDCList);
+ if (code) {
+ return code;
}
afs_indexFlags[tdc->index] &= ~IFDiscarded;
ObtainWriteLock(&tdc->lock, 605);
afs_AdjustSize(tdc, 0);
}
- return tdc;
+ *adc = tdc;
+ return 0;
}
/*!
* Get a fresh dcache from the free or discarded list.
*
+ * \param adc Set to the new dcache on success, and NULL on error.
* \param avc Who's dcache is this going to be?
* \param chunk The position where it will be placed in.
* \param lock How are locks held.
* - avc (R if (lock & 1) set and W otherwise)
* \note It write locks the new dcache. The caller must unlock it.
*
- * \return The new dcache.
+ * \return If we're out of dslots, ENOSPC. If we encountered disk errors, EIO.
+ * On success, return 0.
*/
-struct dcache *
-afs_AllocDCache(struct vcache *avc, afs_int32 chunk, afs_int32 lock,
- struct VenusFid *ashFid)
+static int
+afs_AllocDCache(struct dcache **adc, struct vcache *avc, afs_int32 chunk,
+ afs_int32 lock, struct VenusFid *ashFid)
{
+ int code;
struct dcache *tdc = NULL;
+ *adc = NULL;
+
/* if (lock & 2), prefer 'free' dcaches; otherwise, prefer 'discard'
- * dcaches. In either case, try both if our first choice doesn't work. */
+ * dcaches. In either case, try both if our first choice doesn't work due
+ * to ENOSPC. */
if ((lock & 2)) {
- tdc = afs_AllocFreeDSlot();
- if (!tdc) {
- tdc = afs_AllocDiscardDSlot(lock);
+ code = afs_AllocFreeDSlot(&tdc);
+ if (code == ENOSPC) {
+ code = afs_AllocDiscardDSlot(&tdc, lock);
}
} else {
- tdc = afs_AllocDiscardDSlot(lock);
- if (!tdc) {
- tdc = afs_AllocFreeDSlot();
+ code = afs_AllocDiscardDSlot(&tdc, lock);
+ if (code == ENOSPC) {
+ code = afs_AllocFreeDSlot(&tdc);
}
}
- if (!tdc) {
- return NULL;
+ if (code) {
+ return code;
}
/*
if (tdc->lruq.prev == &tdc->lruq)
osi_Panic("lruq 1");
- return tdc;
+ *adc = tdc;
+ return 0;
+}
+
+static int
+IsDCacheSizeOK(struct dcache *adc, struct vcache *avc, afs_int32 chunk_bytes,
+ afs_size_t file_length, afs_uint32 versionNo, int from_net)
+{
+ afs_size_t expected_bytes;
+ afs_size_t chunk_start = AFS_CHUNKTOBASE(adc->f.chunk);
+
+ if (vType(avc) == VDIR) {
+ /*
+ * Directory blobs may be constructed locally (see afs_LocalHero), and
+ * the size of the blob may differ slightly compared to what's on the
+ * fileserver. So, skip size checks for directories.
+ */
+ return 1;
+ }
+
+ if ((avc->f.states & CDirty)) {
+ /*
+ * Our vcache may have writes that are local to our cache, but not yet
+ * written to the fileserver. In such a situation, we may have dcaches
+ * for that file that are "short". For example:
+ *
+ * Say we have a file that is 0 bytes long. A process opens that file,
+ * and writes some data to offset 5M (keeping the file open). Another
+ * process comes along and reads data from offset 1M. We'll try to
+ * fetch data at offset 1M, and the fileserver will respond with 0
+ * bytes, since our locally-written data hasn't been written to the
+ * fileserver yet (on the fileserver, the file is still 0-bytes long).
+ * So our dcache at offset 1M will have 0 bytes.
+ *
+ * So if CDirty is set, don't do any size/length checks at all, since
+ * we have no idea if the avc length is valid.
+ */
+ return 1;
+ }
+
+ if (!from_net && (adc->f.states & DRW)) {
+ /*
+ * The dcache data we're looking at is from our local cache (not from a
+ * fileserver), and it's for data in an RW volume. For cached RW data,
+ * there are some edge cases that can cause the below length checks to
+ * trigger false positives.
+ *
+ * For example: if the local client writes 4 bytes to a new file at
+ * offset 0, and then 4 bytes at offset 0x400000, the file will be
+ * 0x400004 bytes long, but the first dcache chunk will only contain 4
+ * bytes. If such a file is fetched from a fileserver, the first chunk
+ * will have a full chunk of data (most of it zeroes), but on the
+ * client that did the write, the sparse data will not appear in the
+ * dcache.
+ *
+ * Such false positives should only be possible with RW data, since
+ * non-RW data is never generated locally. So to avoid the false
+ * positives, assume the dcache length is OK for RW data if the dcache
+ * came from our local cache (and not directly from a fileserver).
+ */
+ return 1;
+ }
+
+ if (file_length < chunk_start) {
+ expected_bytes = 0;
+
+ } else {
+ expected_bytes = file_length - chunk_start;
+
+ if (vType(avc) != VDIR && expected_bytes > AFS_CHUNKTOSIZE(adc->f.chunk)) {
+ /* A non-dir chunk cannot have more bytes than the chunksize. */
+ expected_bytes = AFS_CHUNKTOSIZE(adc->f.chunk);
+ }
+ }
+
+ if (chunk_bytes != expected_bytes) {
+ static const afs_uint32 one_hour = 60 * 60;
+ static afs_uint32 last_warn;
+ afs_uint32 now = osi_Time();
+
+ if (now < last_warn) {
+ /* clock went backwards */
+ last_warn = now;
+ }
+
+ if (now - last_warn > one_hour) {
+ unsigned int mtime = adc->f.modTime;
+
+ last_warn = now;
+
+ if (from_net) {
+ /*
+ * The dcache we're looking at didn't come from the cache, but is
+ * being populated from the net. Don't print out its mtime in that
+ * case; that would be misleading since that's the mtime from the
+ * last time this dcache slot was written to.
+ */
+ mtime = 0;
+ }
+
+ afs_warn("afs: Detected corrupt dcache for file %d.%u.%u.%u: chunk %d "
+ "(offset %lu) has %d bytes, but it should have %lu bytes\n",
+ adc->f.fid.Cell,
+ adc->f.fid.Fid.Volume,
+ adc->f.fid.Fid.Vnode,
+ adc->f.fid.Fid.Unique,
+ adc->f.chunk,
+ (unsigned long)chunk_start,
+ chunk_bytes,
+ (unsigned long)expected_bytes);
+ afs_warn("afs: (dcache %p, file length %lu, DV %u, dcache mtime %u, "
+ "index %d, dflags 0x%x, mflags 0x%x, states 0x%x, vcache "
+ "states 0x%x)\n",
+ adc,
+ (unsigned long)file_length,
+ versionNo,
+ mtime,
+ adc->index,
+ (unsigned)adc->dflags,
+ (unsigned)adc->mflags,
+ (unsigned)adc->f.states,
+ avc->f.states);
+ afs_warn("afs: Ignoring the dcache for now, but this may indicate "
+ "corruption in the AFS cache, or a bug.\n");
+ }
+ return 0;
+ }
+ return 1;
+}
+
+/*!
+ * Check if a dcache is "fresh". That is, if the dcache's DV matches the DV of
+ * the vcache for that file, and the dcache looks "sane" (its length makes
+ * sense, when considering the length of the given avc).
+ *
+ * \param adc The dcache to check
+ * \param avc The vcache for adc
+ *
+ * \return 1 if the dcache is "fresh". 0 otherwise.
+ */
+int
+afs_IsDCacheFresh(struct dcache *adc, struct vcache *avc)
+{
+ if (!hsame(adc->f.versionNo, avc->f.m.DataVersion)) {
+ return 0;
+ }
+
+ /*
+ * If we've reached here, the DV in adc matches the DV of our avc. Check if
+ * the number of bytes in adc agrees with the avc file length, as a sanity
+ * check. If they don't match, we'll pretend the DVs don't match, so the
+ * bad dcache data will not be used, and we'll probably re-fetch the chunk
+ * data, replacing the bad chunk.
+ */
+
+ if (!IsDCacheSizeOK(adc, avc, adc->f.chunkBytes, avc->f.m.Length,
+ hgetlo(adc->f.versionNo), 0)) {
+ return 0;
+ }
+
+ return 1;
}
/*
updateV2DC(int lockVc, struct vcache *v, struct dcache *d, int src)
{
if (!lockVc || 0 == NBObtainWriteLock(&v->lock, src)) {
- if (hsame(v->f.m.DataVersion, d->f.versionNo) && v->callback)
+ if (afs_IsDCacheFresh(d, v) && v->callback)
v->dchint = d;
if (lockVc)
ReleaseWriteLock(&v->lock);
ReleaseReadLock(&afs_xdcache);
shortcut = 1;
- if (hsame(tdc->f.versionNo, avc->f.m.DataVersion)
+ if (afs_IsDCacheFresh(tdc, avc)
&& !(tdc->dflags & DFFetching)) {
afs_stats_cmperf.dcacheHits++;
if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) {
tdc = afs_GetValidDSlot(index);
if (!tdc) {
- /* we got an i/o error when trying to get the given dslot,
- * but do not bail out just yet; it is possible the dcache
- * we're looking for is elsewhere, so it doesn't matter if
- * we can't load this one. */
+ /* we got an i/o error when trying to get the given dslot.
+ * it's possible the dslot we're looking for is elsewhere,
+ * but most likely the disk cache is currently unusable, so
+ * all afs_GetValidDSlot calls will fail, so just bail out. */
dslot_error = 1;
- continue;
+ index = NULLIDX;
+ break;
}
ReleaseReadLock(&tdc->tlock);
/*
if (!setLocks)
avc->f.states &= ~CDCLock;
}
- tdc = afs_AllocDCache(avc, chunk, aflags, NULL);
- if (!tdc) {
- /* If we can't get space for 5 mins we give up and panic */
- if (++downDCount > 300)
- osi_Panic("getdcache");
+ code = afs_AllocDCache(&tdc, avc, chunk, aflags, NULL);
+ if (code) {
ReleaseWriteLock(&afs_xdcache);
- /*
- * Locks held:
- * avc->lock(R) if setLocks
- * avc->lock(W) if !setLocks
- */
- afs_osi_Wait(1000, 0, 0);
- goto RetryLookup;
+ if (code == ENOSPC) {
+ /* It looks like afs_AllocDCache failed because we don't
+ * have any free dslots to use. Maybe if we wait a little
+ * while, we'll be able to free up some slots, so try for 5
+ * minutes, then bail out. */
+ if (++downDCount > 300) {
+ afs_warn("afs: Unable to get free cache space for file "
+ "%u:%u.%u.%u for 5 minutes; failing with an i/o error\n",
+ avc->f.fid.Cell,
+ avc->f.fid.Fid.Volume,
+ avc->f.fid.Fid.Vnode,
+ avc->f.fid.Fid.Unique);
+ goto done;
+ }
+ afs_osi_Wait(1000, 0, 0);
+ goto RetryLookup;
+ }
+
+ /* afs_AllocDCache failed, but not because we're out of free
+ * dslots. Something must be screwy with the cache, so bail out
+ * immediately without waiting. */
+ afs_warn("afs: Error while alloc'ing cache slot for file "
+ "%u:%u.%u.%u; failing with an i/o error\n",
+ avc->f.fid.Cell,
+ avc->f.fid.Fid.Volume,
+ avc->f.fid.Fid.Vnode,
+ avc->f.fid.Fid.Unique);
+ goto done;
}
/*
if (AFS_CHUNKTOBASE(chunk) >= avc->f.m.Length &&
#endif
#endif /* defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) */
- !hsame(avc->f.m.DataVersion, tdc->f.versionNo))
+ !afs_IsDCacheFresh(tdc, avc))
doReallyAdjustSize = 1;
if (doReallyAdjustSize || overWriteWholeChunk) {
* avc->lock(W) if !setLocks || slowPass
* tdc->lock(S)
*/
- if (!hsame(avc->f.m.DataVersion, tdc->f.versionNo) && !overWriteWholeChunk) {
+ if (!afs_IsDCacheFresh(tdc, avc) && !overWriteWholeChunk) {
/*
* Version number mismatch.
*/
*/
/* Watch for standard race condition around osi_FlushText */
- if (hsame(avc->f.m.DataVersion, tdc->f.versionNo)) {
+ if (afs_IsDCacheFresh(tdc, avc)) {
updateV2DC(setLocks, avc, tdc, 569); /* set hint */
afs_stats_cmperf.dcacheHits++;
ConvertWToSLock(&tdc->lock);
if (size < 0)
size = 0;
afs_CFileTruncate(file, size); /* prune it */
- } else {
+
+ /* Check that the amount of data that we fetched for the
+ * dcache makes sense. */
+ if (!IsDCacheSizeOK(tdc, avc, size,
+ tsmall->OutStatus.Length,
+ tsmall->OutStatus.DataVersion, 1)) {
+ code = EIO;
+ }
+ }
+ if (code) {
if (!setLocks || slowPass) {
afs_StaleVCacheFlags(avc, AFS_STALEVC_CLEARCB, CUnique);
} else {
ReleaseWriteLock(&tdc->lock);
afs_PutDCache(tdc);
tdc = 0;
- ReleaseReadLock(&avc->lock);
- if (tc) {
- /* If we have a connection, we must put it back,
- * since afs_Analyze will not be called here. */
- afs_PutConn(tc, rxconn, SHARED_LOCK);
- }
+ /*
+ * Call afs_Analyze to manage the connection references
+ * and handle the error code (possibly mark servers
+ * down, etc). We are going to retry getting the
+ * dcache regardless, so we just ignore the retry hint
+ * returned by afs_Analyze on this call.
+ */
+ (void)afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
+ AFS_STATS_FS_RPCIDX_FETCHDATA, SHARED_LOCK, NULL);
+
+ ReleaseReadLock(&avc->lock);
slowPass = 1;
goto RetryGetDCache;
* \param aflags
*
*/
-void
+int
afs_dcacheInit(int afiles, int ablocks, int aDentries, int achunk, int aflags)
{
struct dcache *tdp;
afs_warn("afsd: memory cache too large for available memory.\n");
afs_warn("afsd: AFS files cannot be accessed.\n\n");
dcacheDisabled = 1;
+ return code;
} else
afs_warn("Memory cache: Allocating %d dcache entries...",
aDentries);
cacheDiskType = AFS_FCACHE_TYPE_UFS;
afs_cacheType = &afs_UfsCacheOps;
}
+ return 0;
}
/*!
tdc = afs_FindDCache(avc, filePos);
if (tdc) {
ObtainWriteLock(&tdc->lock, 658);
- if (!hsame(tdc->f.versionNo, avc->f.m.DataVersion)
+ if (!afs_IsDCacheFresh(tdc, avc)
|| (tdc->dflags & DFFetching)) {
ReleaseWriteLock(&tdc->lock);
afs_PutDCache(tdc);
ObtainWriteLock(&afs_xdcache, 716);
/* Get a fresh dcache. */
- new_dc = afs_AllocDCache(avc, 0, 0, &shadow_fid);
+ (void)afs_AllocDCache(&new_dc, avc, 0, 0, &shadow_fid);
osi_Assert(new_dc);
ObtainReadLock(&adc->mflock);