* struct so we need only export one symbol for AIX.
*/
static struct CTD_stats {
- osi_timeval_t CTD_beforeSleep;
- osi_timeval_t CTD_afterSleep;
- osi_timeval_t CTD_sleepTime;
- osi_timeval_t CTD_runTime;
+ osi_timeval32_t CTD_beforeSleep;
+ osi_timeval32_t CTD_afterSleep;
+ osi_timeval32_t CTD_sleepTime;
+ osi_timeval32_t CTD_runTime;
int CTD_nSleeps;
} CTD_stats;
void
afs_CacheTruncateDaemon(void)
{
- osi_timeval_t CTD_tmpTime;
+ osi_timeval32_t CTD_tmpTime;
u_int counter;
u_int cb_lowat;
u_int dc_hiwat =
return 0;
}
+static int
+IsDCacheSizeOK(struct dcache *adc, struct vcache *avc, afs_int32 chunk_bytes,
+ afs_size_t file_length, afs_uint32 versionNo, int from_net)
+{
+ afs_size_t expected_bytes;
+ afs_size_t chunk_start = AFS_CHUNKTOBASE(adc->f.chunk);
+
+ if (vType(avc) == VDIR) {
+ /*
+ * Directory blobs may be constructed locally (see afs_LocalHero), and
+ * the size of the blob may differ slightly compared to what's on the
+ * fileserver. So, skip size checks for directories.
+ */
+ return 1;
+ }
+
+ if ((avc->f.states & CDirty)) {
+ /*
+ * Our vcache may have writes that are local to our cache, but not yet
+ * written to the fileserver. In such a situation, we may have dcaches
+ * for that file that are "short". For example:
+ *
+ * Say we have a file that is 0 bytes long. A process opens that file,
+ * and writes some data to offset 5M (keeping the file open). Another
+ * process comes along and reads data from offset 1M. We'll try to
+ * fetch data at offset 1M, and the fileserver will respond with 0
+ * bytes, since our locally-written data hasn't been written to the
+ * fileserver yet (on the fileserver, the file is still 0-bytes long).
+ * So our dcache at offset 1M will have 0 bytes.
+ *
+ * So if CDirty is set, don't do any size/length checks at all, since
+ * we have no idea if the avc length is valid.
+ */
+ return 1;
+ }
+
+ if (!from_net && (adc->f.states & DRW)) {
+ /*
+ * The dcache data we're looking at is from our local cache (not from a
+ * fileserver), and it's for data in an RW volume. For cached RW data,
+ * there are some edge cases that can cause the below length checks to
+ * trigger false positives.
+ *
+ * For example: if the local client writes 4 bytes to a new file at
+ * offset 0, and then 4 bytes at offset 0x400000, the file will be
+ * 0x400004 bytes long, but the first dcache chunk will only contain 4
+ * bytes. If such a file is fetched from a fileserver, the first chunk
+ * will have a full chunk of data (most of it zeroes), but on the
+ * client that did the write, the sparse data will not appear in the
+ * dcache.
+ *
+ * Such false positives should only be possible with RW data, since
+ * non-RW data is never generated locally. So to avoid the false
+ * positives, assume the dcache length is OK for RW data if the dcache
+ * came from our local cache (and not directly from a fileserver).
+ */
+ return 1;
+ }
+
+ if (file_length < chunk_start) {
+ expected_bytes = 0;
+
+ } else {
+ expected_bytes = file_length - chunk_start;
+
+ if (vType(avc) != VDIR && expected_bytes > AFS_CHUNKTOSIZE(adc->f.chunk)) {
+ /* A non-dir chunk cannot have more bytes than the chunksize. */
+ expected_bytes = AFS_CHUNKTOSIZE(adc->f.chunk);
+ }
+ }
+
+ if (chunk_bytes != expected_bytes) {
+ static const afs_uint32 one_hour = 60 * 60;
+ static afs_uint32 last_warn;
+ afs_uint32 now = osi_Time();
+
+ if (now < last_warn) {
+ /* clock went backwards */
+ last_warn = now;
+ }
+
+ if (now - last_warn > one_hour) {
+ unsigned int mtime = adc->f.modTime;
+
+ last_warn = now;
+
+ if (from_net) {
+ /*
+ * The dcache we're looking at didn't come from the cache, but is
+ * being populated from the net. Don't print out its mtime in that
+ * case; that would be misleading since that's the mtime from the
+ * last time this dcache slot was written to.
+ */
+ mtime = 0;
+ }
+
+ afs_warn("afs: Detected corrupt dcache for file %d.%u.%u.%u: chunk %d "
+ "(offset %lu) has %d bytes, but it should have %lu bytes\n",
+ adc->f.fid.Cell,
+ adc->f.fid.Fid.Volume,
+ adc->f.fid.Fid.Vnode,
+ adc->f.fid.Fid.Unique,
+ adc->f.chunk,
+ (unsigned long)chunk_start,
+ chunk_bytes,
+ (unsigned long)expected_bytes);
+ afs_warn("afs: (dcache %p, file length %lu, DV %u, dcache mtime %u, "
+ "index %d, dflags 0x%x, mflags 0x%x, states 0x%x, vcache "
+ "states 0x%x)\n",
+ adc,
+ (unsigned long)file_length,
+ versionNo,
+ mtime,
+ adc->index,
+ (unsigned)adc->dflags,
+ (unsigned)adc->mflags,
+ (unsigned)adc->f.states,
+ avc->f.states);
+ afs_warn("afs: Ignoring the dcache for now, but this may indicate "
+ "corruption in the AFS cache, or a bug.\n");
+ }
+ return 0;
+ }
+ return 1;
+}
+
+/*!
+ * Check if a dcache is "fresh". That is, if the dcache's DV matches the DV of
+ * the vcache for that file, and the dcache looks "sane" (its length makes
+ * sense, when considering the length of the given avc).
+ *
+ * \param adc The dcache to check
+ * \param avc The vcache for adc
+ *
+ * \return 1 if the dcache is "fresh". 0 otherwise.
+ */
+int
+afs_IsDCacheFresh(struct dcache *adc, struct vcache *avc)
+{
+ if (!hsame(adc->f.versionNo, avc->f.m.DataVersion)) {
+ return 0;
+ }
+
+ /*
+ * If we've reached here, the DV in adc matches the DV of our avc. Check if
+ * the number of bytes in adc agrees with the avc file length, as a sanity
+ * check. If they don't match, we'll pretend the DVs don't match, so the
+ * bad dcache data will not be used, and we'll probably re-fetch the chunk
+ * data, replacing the bad chunk.
+ */
+
+ if (!IsDCacheSizeOK(adc, avc, adc->f.chunkBytes, avc->f.m.Length,
+ hgetlo(adc->f.versionNo), 0)) {
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* afs_GetDCache
*
updateV2DC(int lockVc, struct vcache *v, struct dcache *d, int src)
{
if (!lockVc || 0 == NBObtainWriteLock(&v->lock, src)) {
- if (hsame(v->f.m.DataVersion, d->f.versionNo) && v->callback)
+ if (afs_IsDCacheFresh(d, v) && v->callback)
v->dchint = d;
if (lockVc)
ReleaseWriteLock(&v->lock);
ReleaseReadLock(&afs_xdcache);
shortcut = 1;
- if (hsame(tdc->f.versionNo, avc->f.m.DataVersion)
+ if (afs_IsDCacheFresh(tdc, avc)
&& !(tdc->dflags & DFFetching)) {
afs_stats_cmperf.dcacheHits++;
if (AFS_CHUNKTOBASE(chunk) >= avc->f.m.Length &&
#endif
#endif /* defined(AFS_AIX32_ENV) || defined(AFS_SGI_ENV) */
- !hsame(avc->f.m.DataVersion, tdc->f.versionNo))
+ !afs_IsDCacheFresh(tdc, avc))
doReallyAdjustSize = 1;
if (doReallyAdjustSize || overWriteWholeChunk) {
* avc->lock(W) if !setLocks || slowPass
* tdc->lock(S)
*/
- if (!hsame(avc->f.m.DataVersion, tdc->f.versionNo) && !overWriteWholeChunk) {
+ if (!afs_IsDCacheFresh(tdc, avc) && !overWriteWholeChunk) {
/*
* Version number mismatch.
*/
*/
/* Watch for standard race condition around osi_FlushText */
- if (hsame(avc->f.m.DataVersion, tdc->f.versionNo)) {
+ if (afs_IsDCacheFresh(tdc, avc)) {
updateV2DC(setLocks, avc, tdc, 569); /* set hint */
afs_stats_cmperf.dcacheHits++;
ConvertWToSLock(&tdc->lock);
* validPos is updated by CacheFetchProc, and can only be
* modifed under a dcache write lock, which we've blocked out
*/
+ afs_size_t length;
+
size = tdc->validPos - Position; /* actual segment size */
if (size < 0)
size = 0;
afs_CFileTruncate(file, size); /* prune it */
- } else {
+
+ /* Check that the amount of data that we fetched for the
+ * dcache makes sense. */
+ FillInt64(length, tsmall->OutStatus.Length_hi, tsmall->OutStatus.Length);
+ if (!IsDCacheSizeOK(tdc, avc, size,
+ length,
+ tsmall->OutStatus.DataVersion, 1)) {
+ code = EIO;
+ }
+ }
+ if (code) {
if (!setLocks || slowPass) {
afs_StaleVCacheFlags(avc, AFS_STALEVC_CLEARCB, CUnique);
} else {
tdc = afs_FindDCache(avc, filePos);
if (tdc) {
ObtainWriteLock(&tdc->lock, 658);
- if (!hsame(tdc->f.versionNo, avc->f.m.DataVersion)
+ if (!afs_IsDCacheFresh(tdc, avc)
|| (tdc->dflags & DFFetching)) {
ReleaseWriteLock(&tdc->lock);
afs_PutDCache(tdc);