src/WINNT/afsd/cm_buf.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /* Copyright (C) 1994 Cazamar Systems, Inc. */
  11
  12 #include <afs/param.h>
  13 #include <afs/stds.h>
  14
  15 #include <windows.h>
  16 #include <osi.h>
  17 #include <stdio.h>
  18 #include <assert.h>
  19 #include <strsafe.h>
  20 #include <math.h>
  21
  22 #include "afsd.h"
  23 #include "cm_memmap.h"
  24
  25 #ifdef DEBUG
  26 #define TRACE_BUFFER 1
  27 #endif
  28
  29 extern void afsi_log(char *pattern, ...);
  30
  31 /* This module implements the buffer package used by the local transaction
  32  * system (cm).  It is initialized by calling cm_Init, which calls buf_Init;
  33  * it must be initalized before any of its main routines are called.
  34  *
  35  * Each buffer is hashed into a hash table by file ID and offset, and if its
  36  * reference count is zero, it is also in a free list.
  37  *
  38  * There are two locks involved in buffer processing.  The global lock
  39  * buf_globalLock protects all of the global variables defined in this module,
  40  * the reference counts and hash pointers in the actual cm_buf_t structures,
  41  * and the LRU queue pointers in the buffer structures.
  42  *
  43  * The mutexes in the buffer structures protect the remaining fields in the
  44  * buffers, as well the data itself.
  45  *
  46  * The locking hierarchy here is this:
  47  *
  48  * - resv multiple simul. buffers reservation
  49  * - lock buffer I/O flags
  50  * - lock buffer's mutex
  51  * - lock buf_globalLock
  52  *
  53  */
  54
  55 /* global debugging log */
  56 osi_log_t *buf_logp = NULL;
  57
  58 /* Global lock protecting hash tables and free lists */
  59 osi_rwlock_t buf_globalLock;
  60
  61 /* ptr to head of the free list (most recently used) and the
  62  * tail (the guy to remove first).  We use osi_Q* functions
  63  * to put stuff in buf_freeListp, and maintain the end
  64  * pointer manually
  65  */
  66
  67 /* a pointer to a list of all buffers, just so that we can find them
  68  * easily for debugging, and for the incr syncer.  Locked under
  69  * the global lock.
  70  */
  71
  72 /* defaults setup; these variables may be manually assigned into
  73  * before calling cm_Init, as a way of changing these defaults.
  74  */
  75
  76 /* callouts for reading and writing data, etc */
  77 cm_buf_ops_t *cm_buf_opsp;
  78
  79 #ifdef DISKCACHE95
  80 /* for experimental disk caching support in Win95 client */
  81 cm_buf_t *buf_diskFreeListp;
  82 cm_buf_t *buf_diskFreeListEndp;
  83 cm_buf_t *buf_diskAllp;
  84 extern int cm_diskCacheEnabled;
  85 #endif /* DISKCACHE95 */
  86
  87 /* set this to 1 when we are terminating to prevent access attempts */
  88 static int buf_ShutdownFlag = 0;
  89
  90 #ifdef DEBUG_REFCOUNT
  91 void buf_HoldLockedDbg(cm_buf_t *bp, char *file, long line)
  92 #else
  93 void buf_HoldLocked(cm_buf_t *bp)
  94 #endif
  95 {
  96     afs_int32 refCount;
  97
  98     osi_assertx(bp->magic == CM_BUF_MAGIC,"incorrect cm_buf_t magic");
  99     refCount = InterlockedIncrement(&bp->refCount);
 100 #ifdef DEBUG_REFCOUNT
 101     osi_Log2(afsd_logp,"buf_HoldLocked bp 0x%p ref %d",bp, refCount);
 102     afsi_log("%s:%d buf_HoldLocked bp 0x%p, ref %d", file, line, bp, refCount);
 103 #endif
 104 }
 105
 106 /* hold a reference to an already held buffer */
 107 #ifdef DEBUG_REFCOUNT
 108 void buf_HoldDbg(cm_buf_t *bp, char *file, long line)
 109 #else
 110 void buf_Hold(cm_buf_t *bp)
 111 #endif
 112 {
 113     afs_int32 refCount;
 114
 115     lock_ObtainRead(&buf_globalLock);
 116     osi_assertx(bp->magic == CM_BUF_MAGIC,"incorrect cm_buf_t magic");
 117     refCount = InterlockedIncrement(&bp->refCount);
 118 #ifdef DEBUG_REFCOUNT
 119     osi_Log2(afsd_logp,"buf_Hold bp 0x%p ref %d",bp, refCount);
 120     afsi_log("%s:%d buf_Hold bp 0x%p, ref %d", file, line, bp, refCount);
 121 #endif
 122     lock_ReleaseRead(&buf_globalLock);
 123 }
 124
 125 /* code to drop reference count while holding buf_globalLock */
 126 #ifdef DEBUG_REFCOUNT
 127 void buf_ReleaseLockedDbg(cm_buf_t *bp, afs_uint32 writeLocked, char *file, long line)
 128 #else
 129 void buf_ReleaseLocked(cm_buf_t *bp, afs_uint32 writeLocked)
 130 #endif
 131 {
 132     afs_int32 refCount;
 133
 134     if (writeLocked)
 135         lock_AssertWrite(&buf_globalLock);
 136     else
 137         lock_AssertRead(&buf_globalLock);
 138
 139     /* ensure that we're in the LRU queue if our ref count is 0 */
 140     osi_assertx(bp->magic == CM_BUF_MAGIC,"incorrect cm_buf_t magic");
 141
 142     refCount = InterlockedDecrement(&bp->refCount);
 143 #ifdef DEBUG_REFCOUNT
 144     osi_Log3(afsd_logp,"buf_ReleaseLocked %s bp 0x%p ref %d",writeLocked?"write":"read", bp, refCount);
 145     afsi_log("%s:%d buf_ReleaseLocked %s bp 0x%p, ref %d", file, line, writeLocked?"write":"read", bp, refCount);
 146 #endif
 147 #ifdef DEBUG
 148     if (refCount < 0)
 149         osi_panic("buf refcount 0",__FILE__,__LINE__);;
 150 #else
 151     osi_assertx(refCount >= 0, "cm_buf_t refCount == 0");
 152 #endif
 153     if (refCount == 0) {
 154         /*
 155          * If we are read locked there could be a race condition
 156          * with buf_Find() so we must obtain a write lock and
 157          * double check that the refCount is actually zero
 158          * before we remove the buffer from the LRU queue.
 159          */
 160         if (!writeLocked)
 161             lock_ConvertRToW(&buf_globalLock);
 162
 163         if (bp->refCount == 0 &&
 164             !(bp->flags & CM_BUF_INLRU)) {
 165             osi_QAdd((osi_queue_t **) &cm_data.buf_freeListp, &bp->q);
 166
 167             /* watch for transition from empty to one element */
 168             if (!cm_data.buf_freeListEndp)
 169                 cm_data.buf_freeListEndp = cm_data.buf_freeListp;
 170             bp->flags |= CM_BUF_INLRU;
 171         }
 172
 173         if (!writeLocked)
 174             lock_ConvertWToR(&buf_globalLock);
 175     }
 176 }
 177
 178 /* release a buffer.  Buffer must be referenced, but unlocked. */
 179 #ifdef DEBUG_REFCOUNT
 180 void buf_ReleaseDbg(cm_buf_t *bp, char *file, long line)
 181 #else
 182 void buf_Release(cm_buf_t *bp)
 183 #endif
 184 {
 185     afs_int32 refCount;
 186
 187     /* ensure that we're in the LRU queue if our ref count is 0 */
 188     osi_assertx(bp->magic == CM_BUF_MAGIC,"incorrect cm_buf_t magic");
 189
 190     refCount = InterlockedDecrement(&bp->refCount);
 191 #ifdef DEBUG_REFCOUNT
 192     osi_Log2(afsd_logp,"buf_Release bp 0x%p ref %d", bp, refCount);
 193     afsi_log("%s:%d buf_ReleaseLocked bp 0x%p, ref %d", file, line, bp, refCount);
 194 #endif
 195 #ifdef DEBUG
 196     if (refCount < 0)
 197         osi_panic("buf refcount 0",__FILE__,__LINE__);;
 198 #else
 199     osi_assertx(refCount >= 0, "cm_buf_t refCount == 0");
 200 #endif
 201     if (refCount == 0) {
 202         lock_ObtainWrite(&buf_globalLock);
 203         if (bp->refCount == 0 &&
 204             !(bp->flags & CM_BUF_INLRU)) {
 205             osi_QAdd((osi_queue_t **) &cm_data.buf_freeListp, &bp->q);
 206
 207             /* watch for transition from empty to one element */
 208             if (!cm_data.buf_freeListEndp)
 209                 cm_data.buf_freeListEndp = cm_data.buf_freeListp;
 210             bp->flags |= CM_BUF_INLRU;
 211         }
 212         lock_ReleaseWrite(&buf_globalLock);
 213     }
 214 }
 215
 216 long
 217 buf_Sync(int quitOnShutdown)
 218 {
 219     cm_buf_t **bpp, *bp, *prevbp;
 220     afs_uint32 wasDirty = 0;
 221     cm_req_t req;
 222
 223     /* go through all of the dirty buffers */
 224     lock_ObtainRead(&buf_globalLock);
 225     for (bpp = &cm_data.buf_dirtyListp, prevbp = NULL; bp = *bpp; ) {
 226         if (quitOnShutdown && buf_ShutdownFlag)
 227             break;
 228
 229         lock_ReleaseRead(&buf_globalLock);
 230         /* all dirty buffers are held when they are added to the
 231         * dirty list.  No need for an additional hold.
 232         */
 233         lock_ObtainMutex(&bp->mx);
 234
 235         if (bp->flags & CM_BUF_DIRTY && !(bp->flags & CM_BUF_REDIR)) {
 236             /* start cleaning the buffer; don't touch log pages since
 237             * the log code counts on knowing exactly who is writing
 238             * a log page at any given instant.
 239             */
 240             afs_uint32 dirty;
 241
 242             cm_InitReq(&req);
 243             req.flags |= CM_REQ_NORETRY;
 244             buf_CleanAsyncLocked(bp, &req, &dirty);
 245             wasDirty |= dirty;
 246         }
 247
 248         /* the buffer may or may not have been dirty
 249         * and if dirty may or may not have been cleaned
 250         * successfully.  check the dirty flag again.
 251         */
 252         if (!(bp->flags & CM_BUF_DIRTY)) {
 253             /* remove the buffer from the dirty list */
 254             lock_ObtainWrite(&buf_globalLock);
 255 #ifdef DEBUG_REFCOUNT
 256             if (bp->dirtyp == NULL && bp != cm_data.buf_dirtyListEndp) {
 257                 osi_Log1(afsd_logp,"buf_IncrSyncer bp 0x%p list corruption",bp);
 258                 afsi_log("buf_IncrSyncer bp 0x%p list corruption", bp);
 259             }
 260 #endif
 261             *bpp = bp->dirtyp;
 262             bp->dirtyp = NULL;
 263             bp->flags &= ~CM_BUF_INDL;
 264             if (cm_data.buf_dirtyListp == NULL)
 265                 cm_data.buf_dirtyListEndp = NULL;
 266             else if (cm_data.buf_dirtyListEndp == bp)
 267                 cm_data.buf_dirtyListEndp = prevbp;
 268             buf_ReleaseLocked(bp, TRUE);
 269             lock_ConvertWToR(&buf_globalLock);
 270         } else {
 271             /* advance the pointer so we don't loop forever */
 272             lock_ObtainRead(&buf_globalLock);
 273             bpp = &bp->dirtyp;
 274             prevbp = bp;
 275         }
 276         lock_ReleaseMutex(&bp->mx);
 277     }   /* for loop over a bunch of buffers */
 278     lock_ReleaseRead(&buf_globalLock);
 279
 280     return wasDirty;
 281 }
 282
 283 /* incremental sync daemon.  Writes all dirty buffers every 5000 ms */
 284 void buf_IncrSyncer(long parm)
 285 {
 286     long wasDirty = 0;
 287     long i;
 288
 289     while (buf_ShutdownFlag == 0) {
 290
 291         if (!wasDirty) {
 292             i = SleepEx(5000, 1);
 293             if (i != 0)
 294                 continue;
 295         } else {
 296             Sleep(50);
 297         }
 298
 299         wasDirty = buf_Sync(1);
 300     } /* whole daemon's while loop */
 301 }
 302
 303 long
 304 buf_ValidateBuffers(void)
 305 {
 306     cm_buf_t * bp, *bpf, *bpa, *bpb;
 307     afs_uint64 countb = 0, countf = 0, counta = 0;
 308
 309     if (cm_data.buf_freeListp == NULL && cm_data.buf_freeListEndp != NULL ||
 310          cm_data.buf_freeListp != NULL && cm_data.buf_freeListEndp == NULL) {
 311         afsi_log("cm_ValidateBuffers failure: inconsistent free list pointers");
 312         fprintf(stderr, "cm_ValidateBuffers failure: inconsistent free list pointers\n");
 313         return -9;
 314     }
 315
 316     for (bp = cm_data.buf_freeListEndp; bp; bp=(cm_buf_t *) osi_QPrev(&bp->q)) {
 317         if (bp->magic != CM_BUF_MAGIC) {
 318             afsi_log("cm_ValidateBuffers failure: bp->magic != CM_BUF_MAGIC");
 319             fprintf(stderr, "cm_ValidateBuffers failure: bp->magic != CM_BUF_MAGIC\n");
 320             return -1;
 321         }
 322         countb++;
 323         bpb = bp;
 324
 325         if (countb > cm_data.buf_nbuffers) {
 326             afsi_log("cm_ValidateBuffers failure: countb > cm_data.buf_nbuffers");
 327             fprintf(stderr, "cm_ValidateBuffers failure: countb > cm_data.buf_nbuffers\n");
 328             return -6;
 329         }
 330     }
 331
 332     for (bp = cm_data.buf_freeListp; bp; bp=(cm_buf_t *) osi_QNext(&bp->q)) {
 333         if (bp->magic != CM_BUF_MAGIC) {
 334             afsi_log("cm_ValidateBuffers failure: bp->magic != CM_BUF_MAGIC");
 335             fprintf(stderr, "cm_ValidateBuffers failure: bp->magic != CM_BUF_MAGIC\n");
 336             return -2;
 337         }
 338         countf++;
 339         bpf = bp;
 340
 341         if (countf > cm_data.buf_nbuffers) {
 342             afsi_log("cm_ValidateBuffers failure: countf > cm_data.buf_nbuffers");
 343             fprintf(stderr, "cm_ValidateBuffers failure: countf > cm_data.buf_nbuffers\n");
 344             return -7;
 345         }
 346     }
 347
 348     for (bp = cm_data.buf_allp; bp; bp=bp->allp) {
 349         if (bp->magic != CM_BUF_MAGIC) {
 350             afsi_log("cm_ValidateBuffers failure: bp->magic != CM_BUF_MAGIC");
 351             fprintf(stderr, "cm_ValidateBuffers failure: bp->magic != CM_BUF_MAGIC\n");
 352             return -3;
 353         }
 354         counta++;
 355         bpa = bp;
 356
 357         if (counta > cm_data.buf_nbuffers) {
 358             afsi_log("cm_ValidateBuffers failure: counta > cm_data.buf_nbuffers");
 359             fprintf(stderr, "cm_ValidateBuffers failure: counta > cm_data.buf_nbuffers\n");
 360             return -8;
 361         }
 362     }
 363
 364     if (countb != countf) {
 365         afsi_log("cm_ValidateBuffers failure: countb != countf");
 366         fprintf(stderr, "cm_ValidateBuffers failure: countb != countf\n");
 367         return -4;
 368     }
 369
 370     if (counta != cm_data.buf_nbuffers) {
 371         afsi_log("cm_ValidateBuffers failure: counta != cm_data.buf_nbuffers");
 372         fprintf(stderr, "cm_ValidateBuffers failure: counta != cm_data.buf_nbuffers\n");
 373         return -5;
 374     }
 375
 376     return 0;
 377 }
 378
 379 void buf_Shutdown(void)
 380 {
 381     /* disable the buf_IncrSyncer() threads */
 382     buf_ShutdownFlag = 1;
 383
 384     /* then force all dirty buffers to the file servers */
 385     buf_Sync(0);
 386 }
 387
 388 /* initialize the buffer package; called with no locks
 389  * held during the initialization phase.
 390  */
 391 long buf_Init(int newFile, cm_buf_ops_t *opsp, afs_uint64 nbuffers)
 392 {
 393     static osi_once_t once;
 394     cm_buf_t *bp;
 395     thread_t phandle;
 396     long i;
 397     unsigned long pid;
 398     char *data;
 399
 400     if ( newFile ) {
 401         if (nbuffers)
 402             cm_data.buf_nbuffers = nbuffers;
 403
 404         /* Have to be able to reserve a whole chunk */
 405         if (((cm_data.buf_nbuffers - 3) * cm_data.buf_blockSize) < cm_chunkSize)
 406             return CM_ERROR_TOOFEWBUFS;
 407     }
 408
 409     /* recall for callouts */
 410     cm_buf_opsp = opsp;
 411
 412     if (osi_Once(&once)) {
 413         /* initialize global locks */
 414         lock_InitializeRWLock(&buf_globalLock, "Global buffer lock", LOCK_HIERARCHY_BUF_GLOBAL);
 415
 416         if ( newFile ) {
 417             /* remember this for those who want to reset it */
 418             cm_data.buf_nOrigBuffers = cm_data.buf_nbuffers;
 419
 420             /* lower hash size to a prime number */
 421             cm_data.buf_hashSize = osi_PrimeLessThan((afs_uint32)(cm_data.buf_nbuffers/7 + 1));
 422
 423             /* create hash table */
 424             memset((void *)cm_data.buf_scacheHashTablepp, 0, cm_data.buf_hashSize * sizeof(cm_buf_t *));
 425
 426             /* another hash table */
 427             memset((void *)cm_data.buf_fileHashTablepp, 0, cm_data.buf_hashSize * sizeof(cm_buf_t *));
 428
 429             /* create buffer headers and put in free list */
 430             bp = cm_data.bufHeaderBaseAddress;
 431             data = cm_data.bufDataBaseAddress;
 432             cm_data.buf_allp = NULL;
 433
 434             for (i=0; i<cm_data.buf_nbuffers; i++) {
 435                 osi_assertx(bp >= cm_data.bufHeaderBaseAddress && bp < (cm_buf_t *)cm_data.bufDataBaseAddress,
 436                             "invalid cm_buf_t address");
 437                 osi_assertx(data >= cm_data.bufDataBaseAddress && data < cm_data.bufEndOfData,
 438                             "invalid cm_buf_t data address");
 439
 440                 /* allocate and zero some storage */
 441                 memset(bp, 0, sizeof(cm_buf_t));
 442                 bp->magic = CM_BUF_MAGIC;
 443                 /* thread on list of all buffers */
 444                 bp->allp = cm_data.buf_allp;
 445                 cm_data.buf_allp = bp;
 446
 447                 osi_QAdd((osi_queue_t **)&cm_data.buf_freeListp, &bp->q);
 448                 bp->flags |= CM_BUF_INLRU;
 449                 lock_InitializeMutex(&bp->mx, "Buffer mutex", LOCK_HIERARCHY_BUFFER);
 450
 451                 /* grab appropriate number of bytes from aligned zone */
 452                 bp->datap = data;
 453
 454                 /* setup last buffer pointer */
 455                 if (i == 0)
 456                     cm_data.buf_freeListEndp = bp;
 457
 458                 /* next */
 459                 bp++;
 460                 data += cm_data.buf_blockSize;
 461             }
 462
 463             /* none reserved at first */
 464             cm_data.buf_reservedBufs = 0;
 465
 466             /* just for safety's sake */
 467             cm_data.buf_maxReservedBufs = cm_data.buf_nbuffers - 3;
 468         } else {
 469             bp = cm_data.bufHeaderBaseAddress;
 470             data = cm_data.bufDataBaseAddress;
 471
 472             for (i=0; i<cm_data.buf_nbuffers; i++) {
 473                 lock_InitializeMutex(&bp->mx, "Buffer mutex", LOCK_HIERARCHY_BUFFER);
 474                 bp->userp = NULL;
 475                 bp->waitCount = 0;
 476                 bp->waitRequests = 0;
 477                 bp->flags &= ~CM_BUF_WAITING;
 478                 bp++;
 479             }
 480         }
 481
 482 #ifdef TESTING
 483         buf_ValidateBufQueues();
 484 #endif /* TESTING */
 485
 486 #ifdef TRACE_BUFFER
 487         /* init the buffer trace log */
 488         buf_logp = osi_LogCreate("buffer", 1000);
 489         osi_LogEnable(buf_logp);
 490 #endif
 491
 492         osi_EndOnce(&once);
 493
 494         /* and create the incr-syncer */
 495         phandle = thrd_Create(0, 0,
 496                                (ThreadFunc) buf_IncrSyncer, 0, 0, &pid,
 497                                "buf_IncrSyncer");
 498
 499         osi_assertx(phandle != NULL, "buf: can't create incremental sync proc");
 500         CloseHandle(phandle);
 501     }
 502
 503 #ifdef TESTING
 504     buf_ValidateBufQueues();
 505 #endif /* TESTING */
 506     return 0;
 507 }
 508
 509 /* add nbuffers to the buffer pool, if possible.
 510  * Called with no locks held.
 511  */
 512 long buf_AddBuffers(afs_uint64 nbuffers)
 513 {
 514     /* The size of a virtual cache cannot be changed after it has
 515      * been created.  Subsequent calls to MapViewofFile() with
 516      * an existing mapping object name would not allow the
 517      * object to be resized.  Return failure immediately.
 518      *
 519      * A similar problem now occurs with the persistent cache
 520      * given that the memory mapped file now contains a complex
 521      * data structure.
 522      */
 523     afsi_log("request to add %d buffers to the existing cache of size %d denied",
 524               nbuffers, cm_data.buf_nbuffers);
 525
 526     return CM_ERROR_INVAL;
 527 }
 528
 529 /* interface to set the number of buffers to an exact figure.
 530  * Called with no locks held.
 531  */
 532 long buf_SetNBuffers(afs_uint64 nbuffers)
 533 {
 534     if (nbuffers < 10)
 535         return CM_ERROR_INVAL;
 536     if (nbuffers == cm_data.buf_nbuffers)
 537         return 0;
 538     else if (nbuffers > cm_data.buf_nbuffers)
 539         return buf_AddBuffers(nbuffers - cm_data.buf_nbuffers);
 540     else
 541         return CM_ERROR_INVAL;
 542 }
 543
 544 /* wait for reading or writing to clear; called with write-locked
 545  * buffer and unlocked scp and returns with locked buffer.
 546  */
 547 void buf_WaitIO(cm_scache_t * scp, cm_buf_t *bp)
 548 {
 549     int release = 0;
 550
 551     if (scp)
 552         osi_assertx(scp->magic == CM_SCACHE_MAGIC, "invalid cm_scache_t magic");
 553     osi_assertx(bp->magic == CM_BUF_MAGIC, "invalid cm_buf_t magic");
 554
 555     while (1) {
 556         /* if no IO is happening, we're done */
 557         if (!(bp->flags & (CM_BUF_READING | CM_BUF_WRITING)))
 558             break;
 559
 560         /* otherwise I/O is happening, but some other thread is waiting for
 561          * the I/O already.  Wait for that guy to figure out what happened,
 562          * and then check again.
 563          */
 564         if ( bp->flags & CM_BUF_WAITING ) {
 565             bp->waitCount++;
 566             bp->waitRequests++;
 567             osi_Log1(buf_logp, "buf_WaitIO CM_BUF_WAITING already set for 0x%p", bp);
 568         } else {
 569             osi_Log1(buf_logp, "buf_WaitIO CM_BUF_WAITING set for 0x%p", bp);
 570             bp->flags |= CM_BUF_WAITING;
 571             bp->waitCount = bp->waitRequests = 1;
 572         }
 573         osi_SleepM((LONG_PTR)bp, &bp->mx);
 574
 575         smb_UpdateServerPriority();
 576
 577         lock_ObtainMutex(&bp->mx);
 578         osi_Log1(buf_logp, "buf_WaitIO conflict wait done for 0x%p", bp);
 579         bp->waitCount--;
 580         if (bp->waitCount == 0) {
 581             osi_Log1(buf_logp, "buf_WaitIO CM_BUF_WAITING reset for 0x%p", bp);
 582             bp->flags &= ~CM_BUF_WAITING;
 583             bp->waitRequests = 0;
 584         }
 585
 586         if ( !scp ) {
 587             if (scp = cm_FindSCache(&bp->fid))
 588                  release = 1;
 589         }
 590         if ( scp ) {
 591             lock_ObtainRead(&scp->rw);
 592             if (scp->flags & CM_SCACHEFLAG_WAITING) {
 593                 osi_Log1(buf_logp, "buf_WaitIO waking scp 0x%p", scp);
 594                 osi_Wakeup((LONG_PTR)&scp->flags);
 595             }
 596             lock_ReleaseRead(&scp->rw);
 597         }
 598     }
 599
 600     /* if we get here, the IO is done, but we may have to wakeup people waiting for
 601      * the I/O to complete.  Do so.
 602      */
 603     if (bp->flags & CM_BUF_WAITING) {
 604         osi_Log1(buf_logp, "buf_WaitIO Waking bp 0x%p", bp);
 605         osi_Wakeup((LONG_PTR) bp);
 606     }
 607     osi_Log1(buf_logp, "WaitIO finished wait for bp 0x%p", bp);
 608
 609     if (scp && release)
 610         cm_ReleaseSCache(scp);
 611 }
 612
 613 /* find a buffer, if any, for a particular file ID and offset.  Assumes
 614  * that buf_globalLock is write locked when called.
 615  */
 616 cm_buf_t *buf_FindLocked(struct cm_scache *scp, osi_hyper_t *offsetp)
 617 {
 618     afs_uint32 i;
 619     cm_buf_t *bp;
 620
 621     i = BUF_HASH(&scp->fid, offsetp);
 622     for(bp = cm_data.buf_scacheHashTablepp[i]; bp; bp=bp->hashp) {
 623         if (cm_FidCmp(&scp->fid, &bp->fid) == 0
 624              && offsetp->LowPart == bp->offset.LowPart
 625              && offsetp->HighPart == bp->offset.HighPart) {
 626             buf_HoldLocked(bp);
 627             break;
 628         }
 629     }
 630
 631     /* return whatever we found, if anything */
 632     return bp;
 633 }
 634
 635 /* find a buffer with offset *offsetp for vnode *scp.  Called
 636  * with no locks held.
 637  */
 638 cm_buf_t *buf_Find(struct cm_scache *scp, osi_hyper_t *offsetp)
 639 {
 640     cm_buf_t *bp;
 641
 642     lock_ObtainRead(&buf_globalLock);
 643     bp = buf_FindLocked(scp, offsetp);
 644     lock_ReleaseRead(&buf_globalLock);
 645
 646     return bp;
 647 }
 648
 649 /* start cleaning I/O on this buffer.  Buffer must be write locked, and is returned
 650  * write-locked.
 651  *
 652  * Makes sure that there's only one person writing this block
 653  * at any given time, and also ensures that the log is forced sufficiently far,
 654  * if this buffer contains logged data.
 655  *
 656  * Returns non-zero if the buffer was dirty.
 657  */
 658 afs_uint32 buf_CleanAsyncLocked(cm_buf_t *bp, cm_req_t *reqp, afs_uint32 *pisdirty)
 659 {
 660     afs_uint32 code = 0;
 661     afs_uint32 isdirty = 0;
 662     cm_scache_t * scp = NULL;
 663     osi_hyper_t offset;
 664
 665     osi_assertx(bp->magic == CM_BUF_MAGIC, "invalid cm_buf_t magic");
 666
 667     while ((bp->flags & CM_BUF_DIRTY) == CM_BUF_DIRTY) {
 668         isdirty = 1;
 669         lock_ReleaseMutex(&bp->mx);
 670
 671         scp = cm_FindSCache(&bp->fid);
 672         if (scp) {
 673             osi_Log2(buf_logp, "buf_CleanAsyncLocked starts I/O on scp 0x%p buf 0x%p", scp, bp);
 674
 675             offset = bp->offset;
 676             LargeIntegerAdd(offset, ConvertLongToLargeInteger(bp->dirty_offset));
 677             code = (*cm_buf_opsp->Writep)(scp, &offset,
 678 #if 1
 679                                            /* we might as well try to write all of the contiguous
 680                                             * dirty buffers in one RPC
 681                                             */
 682                                            cm_chunkSize,
 683 #else
 684                                           bp->dirty_length,
 685 #endif
 686                                           0, bp->userp, reqp);
 687             osi_Log3(buf_logp, "buf_CleanAsyncLocked I/O on scp 0x%p buf 0x%p, done=%d", scp, bp, code);
 688
 689             cm_ReleaseSCache(scp);
 690             scp = NULL;
 691         } else {
 692             osi_Log1(buf_logp, "buf_CleanAsyncLocked unable to start I/O - scp not found buf 0x%p", bp);
 693             code = CM_ERROR_NOSUCHFILE;
 694         }
 695
 696         lock_ObtainMutex(&bp->mx);
 697         /* if the Write routine returns No Such File, clear the dirty flag
 698          * because we aren't going to be able to write this data to the file
 699          * server.
 700          */
 701         if (code == CM_ERROR_NOSUCHFILE || code == CM_ERROR_BADFD || code == CM_ERROR_NOACCESS ||
 702             code == CM_ERROR_QUOTA || code == CM_ERROR_SPACE || code == CM_ERROR_TOOBIG ||
 703             code == CM_ERROR_READONLY || code == CM_ERROR_NOSUCHPATH){
 704             bp->flags &= ~CM_BUF_DIRTY;
 705             bp->flags |= CM_BUF_ERROR;
 706             bp->dirty_offset = 0;
 707             bp->dirty_length = 0;
 708             bp->error = code;
 709             bp->dataVersion = CM_BUF_VERSION_BAD;
 710             bp->dirtyCounter++;
 711             break;
 712         }
 713
 714 #ifdef DISKCACHE95
 715         /* Disk cache support */
 716         /* write buffer to disk cache (synchronous for now) */
 717         diskcache_Update(bp->dcp, bp->datap, cm_data.buf_blockSize, bp->dataVersion);
 718 #endif /* DISKCACHE95 */
 719
 720         /* if we get here and retries are not permitted
 721          * then we need to exit this loop regardless of
 722          * whether or not we were able to clear the dirty bit
 723          */
 724         if (reqp->flags & CM_REQ_NORETRY)
 725             break;
 726
 727         /* Ditto if the hardDeadTimeout or idleTimeout was reached */
 728         if (code == CM_ERROR_TIMEDOUT || code == CM_ERROR_ALLDOWN ||
 729             code == CM_ERROR_ALLBUSY || code == CM_ERROR_ALLOFFLINE ||
 730             code == CM_ERROR_CLOCKSKEW) {
 731             break;
 732         }
 733     }
 734
 735     /* if someone was waiting for the I/O that just completed or failed,
 736      * wake them up.
 737      */
 738     if (bp->flags & CM_BUF_WAITING) {
 739         /* turn off flags and wakeup users */
 740         osi_Log1(buf_logp, "buf_WaitIO Waking bp 0x%p", bp);
 741         osi_Wakeup((LONG_PTR) bp);
 742     }
 743
 744     if (pisdirty)
 745         *pisdirty = isdirty;
 746
 747     return code;
 748 }
 749
 750 /* Called with a zero-ref count buffer and with the buf_globalLock write locked.
 751  * recycles the buffer, and leaves it ready for reuse with a ref count of 1.
 752  * The buffer must already be clean, and no I/O should be happening to it.
 753  */
 754 void buf_Recycle(cm_buf_t *bp)
 755 {
 756     afs_uint32 i;
 757     cm_buf_t **lbpp;
 758     cm_buf_t *tbp;
 759     cm_buf_t *prevBp, *nextBp;
 760
 761     osi_assertx(bp->magic == CM_BUF_MAGIC, "invalid cm_buf_t magic");
 762
 763     /* if we get here, we know that the buffer still has a 0 ref count,
 764      * and that it is clean and has no currently pending I/O.  This is
 765      * the dude to return.
 766      * Remember that as long as the ref count is 0, we know that we won't
 767      * have any lock conflicts, so we can grab the buffer lock out of
 768      * order in the locking hierarchy.
 769      */
 770     osi_Log3( buf_logp, "buf_Recycle recycles 0x%p, off 0x%x:%08x",
 771               bp, bp->offset.HighPart, bp->offset.LowPart);
 772
 773     osi_assertx(bp->refCount == 0, "cm_buf_t refcount != 0");
 774     osi_assertx(!(bp->flags & (CM_BUF_READING | CM_BUF_WRITING | CM_BUF_DIRTY)),
 775                 "incorrect cm_buf_t flags");
 776     lock_AssertWrite(&buf_globalLock);
 777
 778     if (bp->flags & CM_BUF_INHASH) {
 779         /* Remove from hash */
 780
 781         i = BUF_HASH(&bp->fid, &bp->offset);
 782         lbpp = &(cm_data.buf_scacheHashTablepp[i]);
 783         for(tbp = *lbpp; tbp; lbpp = &tbp->hashp, tbp = *lbpp) {
 784             if (tbp == bp)
 785                 break;
 786         }
 787
 788         /* we better find it */
 789         osi_assertx(tbp != NULL, "buf_Recycle: hash table screwup");
 790
 791         *lbpp = bp->hashp;      /* hash out */
 792         bp->hashp = NULL;
 793
 794         /* Remove from file hash */
 795
 796         i = BUF_FILEHASH(&bp->fid);
 797         prevBp = bp->fileHashBackp;
 798         bp->fileHashBackp = NULL;
 799         nextBp = bp->fileHashp;
 800         bp->fileHashp = NULL;
 801         if (prevBp)
 802             prevBp->fileHashp = nextBp;
 803         else
 804             cm_data.buf_fileHashTablepp[i] = nextBp;
 805         if (nextBp)
 806             nextBp->fileHashBackp = prevBp;
 807
 808         bp->flags &= ~CM_BUF_INHASH;
 809     }
 810
 811     /* make the fid unrecognizable */
 812     memset(&bp->fid, 0, sizeof(cm_fid_t));
 813 }
 814
 815 /* recycle a buffer, removing it from the free list, hashing in its new identity
 816  * and returning it write-locked so that no one can use it.  Called without
 817  * any locks held, and can return an error if it loses the race condition and
 818  * finds that someone else created the desired buffer.
 819  *
 820  * If success is returned, the buffer is returned write-locked.
 821  *
 822  * May be called with null scp and offsetp, if we're just trying to reclaim some
 823  * space from the buffer pool.  In that case, the buffer will be returned
 824  * without being hashed into the hash table.
 825  */
 826 long buf_GetNewLocked(struct cm_scache *scp, osi_hyper_t *offsetp, cm_buf_t **bufpp)
 827 {
 828     cm_buf_t *bp;       /* buffer we're dealing with */
 829     cm_buf_t *nextBp;   /* next buffer in file hash chain */
 830     afs_uint32 i;       /* temp */
 831     cm_req_t req;
 832
 833     cm_InitReq(&req);   /* just in case */
 834
 835 #ifdef TESTING
 836     buf_ValidateBufQueues();
 837 #endif /* TESTING */
 838
 839     while(1) {
 840       retry:
 841         lock_ObtainRead(&scp->bufCreateLock);
 842         lock_ObtainWrite(&buf_globalLock);
 843         /* check to see if we lost the race */
 844         if (scp) {
 845             if (bp = buf_FindLocked(scp, offsetp)) {
 846                 /* Do not call buf_ReleaseLocked() because we
 847                  * do not want to allow the buffer to be added
 848                  * to the free list.
 849                  */
 850                 afs_int32 refCount = InterlockedDecrement(&bp->refCount);
 851 #ifdef DEBUG_REFCOUNT
 852                 osi_Log2(afsd_logp,"buf_GetNewLocked bp 0x%p ref %d", bp, refCount);
 853                 afsi_log("%s:%d buf_GetNewLocked bp 0x%p, ref %d", __FILE__, __LINE__, bp, refCount);
 854 #endif
 855                 lock_ReleaseWrite(&buf_globalLock);
 856                 lock_ReleaseRead(&scp->bufCreateLock);
 857                 return CM_BUF_EXISTS;
 858             }
 859         }
 860
 861         /* does this fix the problem below?  it's a simple solution. */
 862         if (!cm_data.buf_freeListEndp)
 863         {
 864             lock_ReleaseWrite(&buf_globalLock);
 865             lock_ReleaseRead(&scp->bufCreateLock);
 866             osi_Log0(afsd_logp, "buf_GetNewLocked: Free Buffer List is empty - sleeping 200ms");
 867             Sleep(200);
 868             goto retry;
 869         }
 870
 871         /* for debugging, assert free list isn't empty, although we
 872          * really should try waiting for a running tranasction to finish
 873          * instead of this; or better, we should have a transaction
 874          * throttler prevent us from entering this situation.
 875          */
 876         osi_assertx(cm_data.buf_freeListEndp != NULL, "buf_GetNewLocked: no free buffers");
 877
 878         /* look at all buffers in free list, some of which may temp.
 879          * have high refcounts and which then should be skipped,
 880          * starting cleaning I/O for those which are dirty.  If we find
 881          * a clean buffer, we rehash it, lock it and return it.
 882          */
 883         for(bp = cm_data.buf_freeListEndp; bp; bp=(cm_buf_t *) osi_QPrev(&bp->q)) {
 884             /* check to see if it really has zero ref count.  This
 885              * code can bump refcounts, at least, so it may not be
 886              * zero.
 887              */
 888             if (bp->refCount > 0)
 889                 continue;
 890
 891             /* we don't have to lock buffer itself, since the ref
 892              * count is 0 and we know it will stay zero as long as
 893              * we hold the global lock.
 894              */
 895
 896             /* Don't recycle a buffer held by the redirector. */
 897             if (bp->flags & CM_BUF_REDIR)
 898                 continue;
 899
 900             /* don't recycle someone in our own chunk */
 901             if (!cm_FidCmp(&bp->fid, &scp->fid)
 902                  && (bp->offset.LowPart & (-cm_chunkSize))
 903                  == (offsetp->LowPart & (-cm_chunkSize)))
 904                 continue;
 905
 906             /* if this page is being filled (!) or cleaned, see if
 907              * the I/O has completed.  If not, skip it, otherwise
 908              * do the final processing for the I/O.
 909              */
 910             if (bp->flags & (CM_BUF_READING | CM_BUF_WRITING)) {
 911                 /* probably shouldn't do this much work while
 912                  * holding the big lock?  Watch for contention
 913                  * here.
 914                  */
 915                 continue;
 916             }
 917
 918             if (bp->flags & CM_BUF_DIRTY) {
 919                 /* if the buffer is dirty, start cleaning it and
 920                  * move on to the next buffer.  We do this with
 921                  * just the lock required to minimize contention
 922                  * on the big lock.
 923                  */
 924                 buf_HoldLocked(bp);
 925                 lock_ReleaseWrite(&buf_globalLock);
 926                 lock_ReleaseRead(&scp->bufCreateLock);
 927
 928                 /* grab required lock and clean; this only
 929                  * starts the I/O.  By the time we're back,
 930                  * it'll still be marked dirty, but it will also
 931                  * have the WRITING flag set, so we won't get
 932                  * back here.
 933                  */
 934                 buf_CleanAsync(bp, &req, NULL);
 935
 936                 /* now put it back and go around again */
 937                 buf_Release(bp);
 938                 goto retry;
 939             }
 940
 941             /* if we get here, we know that the buffer still has a 0
 942              * ref count, and that it is clean and has no currently
 943              * pending I/O.  This is the dude to return.
 944              * Remember that as long as the ref count is 0, we know
 945              * that we won't have any lock conflicts, so we can grab
 946              * the buffer lock out of order in the locking hierarchy.
 947              */
 948             buf_Recycle(bp);
 949
 950             /* clean up junk flags */
 951             bp->flags &= ~(CM_BUF_EOF | CM_BUF_ERROR);
 952             bp->dataVersion = CM_BUF_VERSION_BAD;       /* unknown so far */
 953
 954             /* now hash in as our new buffer, and give it the
 955              * appropriate label, if requested.
 956              */
 957             if (scp) {
 958                 bp->flags |= CM_BUF_INHASH;
 959                 bp->fid = scp->fid;
 960 #ifdef DEBUG
 961                 bp->scp = scp;
 962 #endif
 963                 bp->offset = *offsetp;
 964                 i = BUF_HASH(&scp->fid, offsetp);
 965                 bp->hashp = cm_data.buf_scacheHashTablepp[i];
 966                 cm_data.buf_scacheHashTablepp[i] = bp;
 967                 i = BUF_FILEHASH(&scp->fid);
 968                 nextBp = cm_data.buf_fileHashTablepp[i];
 969                 bp->fileHashp = nextBp;
 970                 bp->fileHashBackp = NULL;
 971                 if (nextBp)
 972                     nextBp->fileHashBackp = bp;
 973                 cm_data.buf_fileHashTablepp[i] = bp;
 974             }
 975
 976             /* we should move it from the lru queue.  It better still be there,
 977              * since we've held the global (big) lock since we found it there.
 978              */
 979             osi_assertx(bp->flags & CM_BUF_INLRU,
 980                          "buf_GetNewLocked: LRU screwup");
 981
 982             if (cm_data.buf_freeListEndp == bp) {
 983                 /* we're the last guy in this queue, so maintain it */
 984                 cm_data.buf_freeListEndp = (cm_buf_t *) osi_QPrev(&bp->q);
 985             }
 986             osi_QRemove((osi_queue_t **) &cm_data.buf_freeListp, &bp->q);
 987             bp->flags &= ~CM_BUF_INLRU;
 988
 989             /* prepare to return it.  Give it a refcount */
 990             bp->refCount = 1;
 991 #ifdef DEBUG_REFCOUNT
 992             osi_Log2(afsd_logp,"buf_GetNewLocked bp 0x%p ref %d", bp, 1);
 993             afsi_log("%s:%d buf_GetNewLocked bp 0x%p, ref %d", __FILE__, __LINE__, bp, 1);
 994 #endif
 995             /* grab the mutex so that people don't use it
 996              * before the caller fills it with data.  Again, no one
 997              * should have been able to get to this dude to lock it.
 998              */
 999             if (!lock_TryMutex(&bp->mx)) {
1000                 osi_Log2(afsd_logp, "buf_GetNewLocked bp 0x%p cannot be mutex locked.  refCount %d should be 0",
1001                          bp, bp->refCount);
1002                 osi_panic("buf_GetNewLocked: TryMutex failed",__FILE__,__LINE__);
1003             }
1004
1005             lock_ReleaseWrite(&buf_globalLock);
1006             lock_ReleaseRead(&scp->bufCreateLock);
1007
1008             *bufpp = bp;
1009
1010 #ifdef TESTING
1011             buf_ValidateBufQueues();
1012 #endif /* TESTING */
1013             return 0;
1014         } /* for all buffers in lru queue */
1015         lock_ReleaseWrite(&buf_globalLock);
1016         lock_ReleaseRead(&scp->bufCreateLock);
1017         osi_Log0(afsd_logp, "buf_GetNewLocked: Free Buffer List has no buffers with a zero refcount - sleeping 100ms");
1018         Sleep(100);             /* give some time for a buffer to be freed */
1019     }   /* while loop over everything */
1020     /* not reached */
1021 } /* the proc */
1022
1023 /* get a page, returning it held but unlocked.  Doesn't fill in the page
1024  * with I/O, since we're going to write the whole thing new.
1025  */
1026 long buf_GetNew(struct cm_scache *scp, osi_hyper_t *offsetp, cm_buf_t **bufpp)
1027 {
1028     cm_buf_t *bp;
1029     long code;
1030     osi_hyper_t pageOffset;
1031     int created;
1032
1033     created = 0;
1034     pageOffset.HighPart = offsetp->HighPart;
1035     pageOffset.LowPart = offsetp->LowPart & ~(cm_data.buf_blockSize-1);
1036     while (1) {
1037         bp = buf_Find(scp, &pageOffset);
1038         if (bp) {
1039             /* lock it and break out */
1040             lock_ObtainMutex(&bp->mx);
1041             break;
1042         }
1043
1044         /* otherwise, we have to create a page */
1045         code = buf_GetNewLocked(scp, &pageOffset, &bp);
1046
1047         /* check if the buffer was created in a race condition branch.
1048          * If so, go around so we can hold a reference to it.
1049          */
1050         if (code == CM_BUF_EXISTS)
1051             continue;
1052
1053         /* something else went wrong */
1054         if (code != 0)
1055             return code;
1056
1057         /* otherwise, we have a locked buffer that we just created */
1058         created = 1;
1059         break;
1060     } /* big while loop */
1061
1062     /* wait for reads */
1063     if (bp->flags & CM_BUF_READING)
1064         buf_WaitIO(scp, bp);
1065
1066     /* once it has been read once, we can unlock it and return it, still
1067      * with its refcount held.
1068      */
1069     lock_ReleaseMutex(&bp->mx);
1070     *bufpp = bp;
1071     osi_Log4(buf_logp, "buf_GetNew returning bp 0x%p for scp 0x%p, offset 0x%x:%08x",
1072               bp, scp, offsetp->HighPart, offsetp->LowPart);
1073     return 0;
1074 }
1075
1076 /* get a page, returning it held but unlocked.  Make sure it is complete */
1077 /* The scp must be unlocked when passed to this function */
1078 long buf_Get(struct cm_scache *scp, osi_hyper_t *offsetp, cm_buf_t **bufpp)
1079 {
1080     cm_buf_t *bp;
1081     long code;
1082     osi_hyper_t pageOffset;
1083     unsigned long tcount;
1084     int created;
1085     long lcount = 0;
1086 #ifdef DISKCACHE95
1087     cm_diskcache_t *dcp;
1088 #endif /* DISKCACHE95 */
1089
1090     created = 0;
1091     pageOffset.HighPart = offsetp->HighPart;
1092     pageOffset.LowPart = offsetp->LowPart & ~(cm_data.buf_blockSize-1);
1093     while (1) {
1094         lcount++;
1095 #ifdef TESTING
1096         buf_ValidateBufQueues();
1097 #endif /* TESTING */
1098
1099         bp = buf_Find(scp, &pageOffset);
1100         if (bp) {
1101             /* lock it and break out */
1102             lock_ObtainMutex(&bp->mx);
1103
1104 #ifdef DISKCACHE95
1105             /* touch disk chunk to update LRU info */
1106             diskcache_Touch(bp->dcp);
1107 #endif /* DISKCACHE95 */
1108             break;
1109         }
1110
1111         /* otherwise, we have to create a page */
1112         code = buf_GetNewLocked(scp, &pageOffset, &bp);
1113         /* bp->mx is now held */
1114
1115         /* check if the buffer was created in a race condition branch.
1116          * If so, go around so we can hold a reference to it.
1117          */
1118         if (code == CM_BUF_EXISTS)
1119             continue;
1120
1121         /* something else went wrong */
1122         if (code != 0) {
1123 #ifdef TESTING
1124             buf_ValidateBufQueues();
1125 #endif /* TESTING */
1126             return code;
1127         }
1128
1129         /* otherwise, we have a locked buffer that we just created */
1130         created = 1;
1131         break;
1132     } /* big while loop */
1133
1134     /* if we get here, we have a locked buffer that may have just been
1135      * created, in which case it needs to be filled with data.
1136      */
1137     if (created) {
1138         /* load the page; freshly created pages should be idle */
1139         osi_assertx(!(bp->flags & (CM_BUF_READING | CM_BUF_WRITING)), "incorrect cm_buf_t flags");
1140
1141         /* start the I/O; may drop lock */
1142         bp->flags |= CM_BUF_READING;
1143         code = (*cm_buf_opsp->Readp)(bp, cm_data.buf_blockSize, &tcount, NULL);
1144
1145 #ifdef DISKCACHE95
1146         code = diskcache_Get(&bp->fid, &bp->offset, bp->datap, cm_data.buf_blockSize, &bp->dataVersion, &tcount, &dcp);
1147         bp->dcp = dcp;    /* pointer to disk cache struct. */
1148 #endif /* DISKCACHE95 */
1149
1150         if (code != 0) {
1151             /* failure or queued */
1152             if (code != ERROR_IO_PENDING) {
1153                 bp->error = code;
1154                 bp->flags |= CM_BUF_ERROR;
1155                 bp->flags &= ~CM_BUF_READING;
1156                 if (bp->flags & CM_BUF_WAITING) {
1157                     osi_Log1(buf_logp, "buf_Get Waking bp 0x%p", bp);
1158                     osi_Wakeup((LONG_PTR) bp);
1159                 }
1160                 lock_ReleaseMutex(&bp->mx);
1161                 buf_Release(bp);
1162 #ifdef TESTING
1163                 buf_ValidateBufQueues();
1164 #endif /* TESTING */
1165                 return code;
1166             }
1167         } else {
1168             /* otherwise, I/O completed instantly and we're done, except
1169              * for padding the xfr out with 0s and checking for EOF
1170              */
1171             if (tcount < (unsigned long) cm_data.buf_blockSize) {
1172                 memset(bp->datap+tcount, 0, cm_data.buf_blockSize - tcount);
1173                 if (tcount == 0)
1174                     bp->flags |= CM_BUF_EOF;
1175             }
1176             bp->flags &= ~CM_BUF_READING;
1177             if (bp->flags & CM_BUF_WAITING) {
1178                 osi_Log1(buf_logp, "buf_Get Waking bp 0x%p", bp);
1179                 osi_Wakeup((LONG_PTR) bp);
1180             }
1181         }
1182
1183     } /* if created */
1184
1185     /* wait for reads, either that which we started above, or that someone
1186      * else started.  We don't care if we return a buffer being cleaned.
1187      */
1188     if (bp->flags & CM_BUF_READING)
1189         buf_WaitIO(scp, bp);
1190
1191     /* once it has been read once, we can unlock it and return it, still
1192      * with its refcount held.
1193      */
1194     lock_ReleaseMutex(&bp->mx);
1195     *bufpp = bp;
1196
1197     /* now remove from queue; will be put in at the head (farthest from
1198      * being recycled) when we're done in buf_Release.
1199      */
1200     lock_ObtainWrite(&buf_globalLock);
1201     if (bp->flags & CM_BUF_INLRU) {
1202         if (cm_data.buf_freeListEndp == bp)
1203             cm_data.buf_freeListEndp = (cm_buf_t *) osi_QPrev(&bp->q);
1204         osi_QRemove((osi_queue_t **) &cm_data.buf_freeListp, &bp->q);
1205         bp->flags &= ~CM_BUF_INLRU;
1206     }
1207     lock_ReleaseWrite(&buf_globalLock);
1208
1209     osi_Log4(buf_logp, "buf_Get returning bp 0x%p for scp 0x%p, offset 0x%x:%08x",
1210               bp, scp, offsetp->HighPart, offsetp->LowPart);
1211 #ifdef TESTING
1212     buf_ValidateBufQueues();
1213 #endif /* TESTING */
1214     return 0;
1215 }
1216
1217 /* count # of elements in the free list;
1218  * we don't bother doing the proper locking for accessing dataVersion or flags
1219  * since it is a pain, and this is really just an advisory call.  If you need
1220  * to do better at some point, rewrite this function.
1221  */
1222 long buf_CountFreeList(void)
1223 {
1224     long count;
1225     cm_buf_t *bufp;
1226
1227     count = 0;
1228     lock_ObtainRead(&buf_globalLock);
1229     for(bufp = cm_data.buf_freeListp; bufp; bufp = (cm_buf_t *) osi_QNext(&bufp->q)) {
1230         /* if the buffer doesn't have an identity, or if the buffer
1231          * has been invalidate (by having its DV stomped upon), then
1232          * count it as free, since it isn't really being utilized.
1233          */
1234         if (!(bufp->flags & CM_BUF_INHASH) || bufp->dataVersion == CM_BUF_VERSION_BAD)
1235             count++;
1236     }
1237     lock_ReleaseRead(&buf_globalLock);
1238     return count;
1239 }
1240
1241 /* clean a buffer synchronously */
1242 afs_uint32 buf_CleanAsync(cm_buf_t *bp, cm_req_t *reqp, afs_uint32 *pisdirty)
1243 {
1244     long code;
1245     osi_assertx(bp->magic == CM_BUF_MAGIC, "invalid cm_buf_t magic");
1246
1247     lock_ObtainMutex(&bp->mx);
1248     code = buf_CleanAsyncLocked(bp, reqp, pisdirty);
1249     lock_ReleaseMutex(&bp->mx);
1250
1251     return code;
1252 }
1253
1254 /* wait for a buffer's cleaning to finish */
1255 void buf_CleanWait(cm_scache_t * scp, cm_buf_t *bp, afs_uint32 locked)
1256 {
1257     osi_assertx(bp->magic == CM_BUF_MAGIC, "invalid cm_buf_t magic");
1258
1259     if (!locked)
1260         lock_ObtainMutex(&bp->mx);
1261     if (bp->flags & CM_BUF_WRITING) {
1262         buf_WaitIO(scp, bp);
1263     }
1264     if (!locked)
1265         lock_ReleaseMutex(&bp->mx);
1266 }
1267
1268 /* set the dirty flag on a buffer, and set associated write-ahead log,
1269  * if there is one.  Allow one to be added to a buffer, but not changed.
1270  *
1271  * The buffer must be locked before calling this routine.
1272  */
1273 void buf_SetDirty(cm_buf_t *bp, afs_uint32 offset, afs_uint32 length, cm_user_t *userp)
1274 {
1275     osi_assertx(bp->magic == CM_BUF_MAGIC, "invalid cm_buf_t magic");
1276     osi_assertx(bp->refCount > 0, "cm_buf_t refcount 0");
1277
1278     if (bp->flags & CM_BUF_DIRTY) {
1279
1280         osi_Log1(buf_logp, "buf_SetDirty 0x%p already dirty", bp);
1281
1282         if (bp->dirty_offset <= offset) {
1283             if (bp->dirty_offset + bp->dirty_length >= offset + length) {
1284                 /* dirty_length remains the same */
1285             } else {
1286                 bp->dirty_length = offset + length - bp->dirty_offset;
1287             }
1288         } else /* bp->dirty_offset > offset */ {
1289             if (bp->dirty_offset + bp->dirty_length >= offset + length) {
1290                 bp->dirty_length = bp->dirty_offset + bp->dirty_length - offset;
1291             } else {
1292                 bp->dirty_length = length;
1293             }
1294             bp->dirty_offset = offset;
1295         }
1296     } else {
1297         osi_Log1(buf_logp, "buf_SetDirty 0x%p", bp);
1298
1299         /* set dirty bit */
1300         bp->flags |= CM_BUF_DIRTY;
1301
1302         /* and turn off EOF flag, since it has associated data now */
1303         bp->flags &= ~CM_BUF_EOF;
1304
1305         bp->dirty_offset = offset;
1306         bp->dirty_length = length;
1307
1308         /* and add to the dirty list.
1309          * we obtain a hold on the buffer for as long as it remains
1310          * in the list.  buffers are only removed from the list by
1311          * the buf_IncrSyncer function regardless of when else the
1312          * dirty flag might be cleared.
1313          *
1314          * This should never happen but just in case there is a bug
1315          * elsewhere, never add to the dirty list if the buffer is
1316          * already there.
1317          */
1318         lock_ObtainWrite(&buf_globalLock);
1319         if (!(bp->flags & CM_BUF_INDL)) {
1320             buf_HoldLocked(bp);
1321             if (!cm_data.buf_dirtyListp) {
1322                 cm_data.buf_dirtyListp = cm_data.buf_dirtyListEndp = bp;
1323             } else {
1324                 cm_data.buf_dirtyListEndp->dirtyp = bp;
1325                 cm_data.buf_dirtyListEndp = bp;
1326             }
1327             bp->dirtyp = NULL;
1328             bp->flags |= CM_BUF_INDL;
1329         }
1330         lock_ReleaseWrite(&buf_globalLock);
1331     }
1332
1333     /* and record the last writer */
1334     if (bp->userp != userp) {
1335         cm_HoldUser(userp);
1336         if (bp->userp)
1337             cm_ReleaseUser(bp->userp);
1338         bp->userp = userp;
1339     }
1340 }
1341
1342 /* clean all buffers, reset log pointers and invalidate all buffers.
1343  * Called with no locks held, and returns with same.
1344  *
1345  * This function is guaranteed to clean and remove the log ptr of all the
1346  * buffers that were dirty or had non-zero log ptrs before the call was
1347  * made.  That's sufficient to clean up any garbage left around by recovery,
1348  * which is all we're counting on this for; there may be newly created buffers
1349  * added while we're running, but that should be OK.
1350  *
1351  * In an environment where there are no transactions (artificially imposed, for
1352  * example, when switching the database to raw mode), this function is used to
1353  * make sure that all updates have been written to the disk.  In that case, we don't
1354  * really require that we forget the log association between pages and logs, but
1355  * it also doesn't hurt.  Since raw mode I/O goes through this buffer package, we don't
1356  * have to worry about invalidating data in the buffers.
1357  *
1358  * This function is used at the end of recovery as paranoia to get the recovered
1359  * database out to disk.  It removes all references to the recovery log and cleans
1360  * all buffers.
1361  */
1362 long buf_CleanAndReset(void)
1363 {
1364     afs_uint32 i;
1365     cm_buf_t *bp;
1366     cm_req_t req;
1367
1368     lock_ObtainRead(&buf_globalLock);
1369     for(i=0; i<cm_data.buf_hashSize; i++) {
1370         for(bp = cm_data.buf_scacheHashTablepp[i]; bp; bp = bp->hashp) {
1371             if ((bp->flags & CM_BUF_DIRTY) == CM_BUF_DIRTY) {
1372                 buf_HoldLocked(bp);
1373                 lock_ReleaseRead(&buf_globalLock);
1374
1375                 /* now no locks are held; clean buffer and go on */
1376                 cm_InitReq(&req);
1377                 req.flags |= CM_REQ_NORETRY;
1378
1379                 buf_CleanAsync(bp, &req, NULL);
1380                 buf_CleanWait(NULL, bp, FALSE);
1381
1382                 /* relock and release buffer */
1383                 lock_ObtainRead(&buf_globalLock);
1384                 buf_ReleaseLocked(bp, FALSE);
1385             } /* dirty */
1386         } /* over one bucket */
1387     }   /* for loop over all hash buckets */
1388
1389     /* release locks */
1390     lock_ReleaseRead(&buf_globalLock);
1391
1392 #ifdef TESTING
1393     buf_ValidateBufQueues();
1394 #endif /* TESTING */
1395
1396     /* and we're done */
1397     return 0;
1398 }
1399
1400 /* called without global lock being held, reserves buffers for callers
1401  * that need more than one held (not locked) at once.
1402  */
1403 void buf_ReserveBuffers(afs_uint64 nbuffers)
1404 {
1405     lock_ObtainWrite(&buf_globalLock);
1406     while (1) {
1407         if (cm_data.buf_reservedBufs + nbuffers > cm_data.buf_maxReservedBufs) {
1408             cm_data.buf_reserveWaiting = 1;
1409             osi_Log1(buf_logp, "buf_ReserveBuffers waiting for %d bufs", nbuffers);
1410             osi_SleepW((LONG_PTR) &cm_data.buf_reservedBufs, &buf_globalLock);
1411             lock_ObtainWrite(&buf_globalLock);
1412         }
1413         else {
1414             cm_data.buf_reservedBufs += nbuffers;
1415             break;
1416         }
1417     }
1418     lock_ReleaseWrite(&buf_globalLock);
1419 }
1420
1421 int buf_TryReserveBuffers(afs_uint64 nbuffers)
1422 {
1423     int code;
1424
1425     lock_ObtainWrite(&buf_globalLock);
1426     if (cm_data.buf_reservedBufs + nbuffers > cm_data.buf_maxReservedBufs) {
1427         code = 0;
1428     }
1429     else {
1430         cm_data.buf_reservedBufs += nbuffers;
1431         code = 1;
1432     }
1433     lock_ReleaseWrite(&buf_globalLock);
1434     return code;
1435 }
1436
1437 /* called without global lock held, releases reservation held by
1438  * buf_ReserveBuffers.
1439  */
1440 void buf_UnreserveBuffers(afs_uint64 nbuffers)
1441 {
1442     lock_ObtainWrite(&buf_globalLock);
1443     cm_data.buf_reservedBufs -= nbuffers;
1444     if (cm_data.buf_reserveWaiting) {
1445         cm_data.buf_reserveWaiting = 0;
1446         osi_Wakeup((LONG_PTR) &cm_data.buf_reservedBufs);
1447     }
1448     lock_ReleaseWrite(&buf_globalLock);
1449 }
1450
1451 /* truncate the buffers past sizep, zeroing out the page, if we don't
1452  * end on a page boundary.
1453  *
1454  * Requires cm_bufCreateLock to be write locked.
1455  */
1456 long buf_Truncate(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp,
1457                    osi_hyper_t *sizep)
1458 {
1459     cm_buf_t *bufp;
1460     cm_buf_t *nbufp;                    /* next buffer, if didRelease */
1461     osi_hyper_t bufEnd;
1462     long code;
1463     long bufferPos;
1464     afs_uint32 i;
1465
1466     /* assert that cm_bufCreateLock is held in write mode */
1467     lock_AssertWrite(&scp->bufCreateLock);
1468
1469     i = BUF_FILEHASH(&scp->fid);
1470
1471     lock_ObtainRead(&buf_globalLock);
1472     bufp = cm_data.buf_fileHashTablepp[i];
1473     if (bufp == NULL) {
1474         lock_ReleaseRead(&buf_globalLock);
1475         return 0;
1476     }
1477
1478     buf_HoldLocked(bufp);
1479     lock_ReleaseRead(&buf_globalLock);
1480     while (bufp) {
1481         lock_ObtainMutex(&bufp->mx);
1482
1483         bufEnd.HighPart = 0;
1484         bufEnd.LowPart = cm_data.buf_blockSize;
1485         bufEnd = LargeIntegerAdd(bufEnd, bufp->offset);
1486
1487         if (cm_FidCmp(&bufp->fid, &scp->fid) == 0 &&
1488              LargeIntegerLessThan(*sizep, bufEnd)) {
1489             buf_WaitIO(scp, bufp);
1490         }
1491         lock_ObtainWrite(&scp->rw);
1492
1493         /* make sure we have a callback (so we have the right value for
1494          * the length), and wait for it to be safe to do a truncate.
1495          */
1496         code = cm_SyncOp(scp, bufp, userp, reqp, 0,
1497                           CM_SCACHESYNC_NEEDCALLBACK
1498                           | CM_SCACHESYNC_GETSTATUS
1499                           | CM_SCACHESYNC_SETSIZE
1500                           | CM_SCACHESYNC_BUFLOCKED);
1501
1502
1503         /* if we succeeded in our locking, and this applies to the right
1504          * file, and the truncate request overlaps the buffer either
1505          * totally or partially, then do something.
1506          */
1507         if (code == 0 && cm_FidCmp(&bufp->fid, &scp->fid) == 0
1508              && LargeIntegerLessThan(*sizep, bufEnd)) {
1509
1510
1511             /* destroy the buffer, turning off its dirty bit, if
1512              * we're truncating the whole buffer.  Otherwise, set
1513              * the dirty bit, and clear out the tail of the buffer
1514              * if we just overlap some.
1515              */
1516             if (LargeIntegerLessThanOrEqualTo(*sizep, bufp->offset)) {
1517                 /* truncating the entire page */
1518                 bufp->flags &= ~CM_BUF_DIRTY;
1519                 bufp->dirty_offset = 0;
1520                 bufp->dirty_length = 0;
1521                 bufp->dataVersion = CM_BUF_VERSION_BAD; /* known bad */
1522                 bufp->dirtyCounter++;
1523             }
1524             else {
1525                 /* don't set dirty, since dirty implies
1526                  * currently up-to-date.  Don't need to do this,
1527                  * since we'll update the length anyway.
1528                  *
1529                  * Zero out remainder of the page, in case we
1530                  * seek and write past EOF, and make this data
1531                  * visible again.
1532                  */
1533                 bufferPos = sizep->LowPart & (cm_data.buf_blockSize - 1);
1534                 osi_assertx(bufferPos != 0, "non-zero bufferPos");
1535                 memset(bufp->datap + bufferPos, 0,
1536                         cm_data.buf_blockSize - bufferPos);
1537             }
1538         }
1539
1540         cm_SyncOpDone( scp, bufp,
1541                        CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS
1542                        | CM_SCACHESYNC_SETSIZE | CM_SCACHESYNC_BUFLOCKED);
1543
1544         lock_ReleaseWrite(&scp->rw);
1545         lock_ReleaseMutex(&bufp->mx);
1546
1547         if (!code) {
1548             nbufp = bufp->fileHashp;
1549             if (nbufp)
1550                 buf_Hold(nbufp);
1551         } else {
1552             /* This forces the loop to end and the error code
1553              * to be returned. */
1554             nbufp = NULL;
1555         }
1556         buf_Release(bufp);
1557         bufp = nbufp;
1558     }
1559
1560 #ifdef TESTING
1561     buf_ValidateBufQueues();
1562 #endif /* TESTING */
1563
1564     /* done */
1565     return code;
1566 }
1567
1568 long buf_FlushCleanPages(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp)
1569 {
1570     long code;
1571     cm_buf_t *bp;               /* buffer we're hacking on */
1572     cm_buf_t *nbp;
1573     int didRelease;
1574     afs_uint32 i;
1575
1576     i = BUF_FILEHASH(&scp->fid);
1577
1578     code = 0;
1579     lock_ObtainRead(&buf_globalLock);
1580     bp = cm_data.buf_fileHashTablepp[i];
1581     if (bp)
1582         buf_HoldLocked(bp);
1583     lock_ReleaseRead(&buf_globalLock);
1584
1585     for (; bp; bp = nbp) {
1586         didRelease = 0; /* haven't released this buffer yet */
1587
1588         /* clean buffer synchronously */
1589         if (cm_FidCmp(&bp->fid, &scp->fid) == 0) {
1590             lock_ObtainMutex(&bp->mx);
1591
1592             /* start cleaning the buffer, and wait for it to finish */
1593             buf_CleanAsyncLocked(bp, reqp, NULL);
1594             buf_WaitIO(scp, bp);
1595             lock_ReleaseMutex(&bp->mx);
1596
1597             /*
1598              * if the error for the previous buffer was BADFD
1599              * then all buffers for the FID are bad.  Do not
1600              * attempt to stabalize.
1601              */
1602             if (code != CM_ERROR_BADFD) {
1603                 code = (*cm_buf_opsp->Stabilizep)(scp, userp, reqp);
1604                 if (code && code != CM_ERROR_BADFD)
1605                     goto skip;
1606             }
1607             if (code == CM_ERROR_BADFD) {
1608                 /* if the scp's FID is bad its because we received VNOVNODE
1609                  * when attempting to FetchStatus before the write.  This
1610                  * page therefore contains data that can no longer be stored.
1611                  */
1612                 lock_ObtainMutex(&bp->mx);
1613                 bp->flags &= ~CM_BUF_DIRTY;
1614                 bp->flags |= CM_BUF_ERROR;
1615                 bp->error = CM_ERROR_BADFD;
1616                 bp->dirty_offset = 0;
1617                 bp->dirty_length = 0;
1618                 bp->dataVersion = CM_BUF_VERSION_BAD;   /* known bad */
1619                 bp->dirtyCounter++;
1620                 lock_ReleaseMutex(&bp->mx);
1621             }
1622
1623             /* actually, we only know that buffer is clean if ref
1624              * count is 1, since we don't have buffer itself locked.
1625              */
1626             if (!(bp->flags & CM_BUF_DIRTY)) {
1627                 lock_ObtainWrite(&buf_globalLock);
1628                 if (bp->refCount == 1) {        /* bp is held above */
1629                     nbp = bp->fileHashp;
1630                     if (nbp)
1631                         buf_HoldLocked(nbp);
1632                     buf_ReleaseLocked(bp, TRUE);
1633                     didRelease = 1;
1634                     buf_Recycle(bp);
1635                 }
1636                 lock_ReleaseWrite(&buf_globalLock);
1637             }
1638
1639             if (code == 0)
1640                 (*cm_buf_opsp->Unstabilizep)(scp, userp);
1641         }
1642
1643       skip:
1644         if (!didRelease) {
1645             lock_ObtainRead(&buf_globalLock);
1646             nbp = bp->fileHashp;
1647             if (nbp)
1648                 buf_HoldLocked(nbp);
1649             buf_ReleaseLocked(bp, FALSE);
1650             lock_ReleaseRead(&buf_globalLock);
1651         }
1652     }   /* for loop over a bunch of buffers */
1653
1654 #ifdef TESTING
1655     buf_ValidateBufQueues();
1656 #endif /* TESTING */
1657
1658     /* done */
1659     return code;
1660 }
1661
1662 /* Must be called with scp->rw held */
1663 long buf_ForceDataVersion(cm_scache_t * scp, afs_uint64 fromVersion, afs_uint64 toVersion)
1664 {
1665     cm_buf_t * bp;
1666     afs_uint32 i;
1667     int found = 0;
1668
1669     lock_AssertAny(&scp->rw);
1670
1671     i = BUF_FILEHASH(&scp->fid);
1672
1673     lock_ObtainRead(&buf_globalLock);
1674
1675     for (bp = cm_data.buf_fileHashTablepp[i]; bp; bp = bp->fileHashp) {
1676         if (cm_FidCmp(&bp->fid, &scp->fid) == 0) {
1677             if (bp->dataVersion == fromVersion) {
1678                 bp->dataVersion = toVersion;
1679                 found = 1;
1680             }
1681         }
1682     }
1683     lock_ReleaseRead(&buf_globalLock);
1684
1685     if (found)
1686         return 0;
1687     else
1688         return ENOENT;
1689 }
1690
1691 long buf_CleanVnode(struct cm_scache *scp, cm_user_t *userp, cm_req_t *reqp)
1692 {
1693     long code = 0;
1694     long wasDirty = 0;
1695     cm_buf_t *bp;               /* buffer we're hacking on */
1696     cm_buf_t *nbp;              /* next one */
1697     afs_uint32 i;
1698
1699     i = BUF_FILEHASH(&scp->fid);
1700
1701     lock_ObtainRead(&buf_globalLock);
1702     bp = cm_data.buf_fileHashTablepp[i];
1703     if (bp)
1704         buf_HoldLocked(bp);
1705     lock_ReleaseRead(&buf_globalLock);
1706     for (; bp; bp = nbp) {
1707         /* clean buffer synchronously */
1708         if (cm_FidCmp(&bp->fid, &scp->fid) == 0) {
1709             lock_ObtainMutex(&bp->mx);
1710             if (bp->flags & CM_BUF_DIRTY) {
1711                 if (userp && userp != bp->userp) {
1712                     cm_HoldUser(userp);
1713                     if (bp->userp)
1714                         cm_ReleaseUser(bp->userp);
1715                     bp->userp = userp;
1716                 }
1717
1718                 switch (code) {
1719                 case CM_ERROR_NOSUCHFILE:
1720                 case CM_ERROR_BADFD:
1721                 case CM_ERROR_NOACCESS:
1722                 case CM_ERROR_QUOTA:
1723                 case CM_ERROR_SPACE:
1724                 case CM_ERROR_TOOBIG:
1725                 case CM_ERROR_READONLY:
1726                 case CM_ERROR_NOSUCHPATH:
1727                     /*
1728                      * Apply the previous fatal error to this buffer.
1729                      * Do not waste the time attempting to store to
1730                      * the file server when we know it will fail.
1731                      */
1732                     bp->flags &= ~CM_BUF_DIRTY;
1733                     bp->flags |= CM_BUF_ERROR;
1734                     bp->dirty_offset = 0;
1735                     bp->dirty_length = 0;
1736                     bp->error = code;
1737                     bp->dataVersion = CM_BUF_VERSION_BAD;
1738                     bp->dirtyCounter++;
1739                     break;
1740                 case CM_ERROR_TIMEDOUT:
1741                 case CM_ERROR_ALLDOWN:
1742                 case CM_ERROR_ALLBUSY:
1743                 case CM_ERROR_ALLOFFLINE:
1744                 case CM_ERROR_CLOCKSKEW:
1745                     /* do not mark the buffer in error state but do
1746                      * not attempt to complete the rest either.
1747                      */
1748                     break;
1749                 default:
1750                     code = buf_CleanAsyncLocked(bp, reqp, &wasDirty);
1751                     if (bp->flags & CM_BUF_ERROR) {
1752                         code = bp->error;
1753                         if (code == 0)
1754                             code = -1;
1755                     }
1756                 }
1757                 buf_CleanWait(scp, bp, TRUE);
1758             }
1759             lock_ReleaseMutex(&bp->mx);
1760         }
1761
1762         lock_ObtainRead(&buf_globalLock);
1763         nbp = bp->fileHashp;
1764         if (nbp)
1765             buf_HoldLocked(nbp);
1766         buf_ReleaseLocked(bp, FALSE);
1767         lock_ReleaseRead(&buf_globalLock);
1768     }   /* for loop over a bunch of buffers */
1769
1770 #ifdef TESTING
1771     buf_ValidateBufQueues();
1772 #endif /* TESTING */
1773
1774     /* done */
1775     return code;
1776 }
1777
1778 #ifdef TESTING
1779 void
1780 buf_ValidateBufQueues(void)
1781 {
1782     cm_buf_t * bp, *bpb, *bpf, *bpa;
1783     afs_uint32 countf=0, countb=0, counta=0;
1784
1785     lock_ObtainRead(&buf_globalLock);
1786     for (bp = cm_data.buf_freeListEndp; bp; bp=(cm_buf_t *) osi_QPrev(&bp->q)) {
1787         if (bp->magic != CM_BUF_MAGIC)
1788             osi_panic("buf magic error",__FILE__,__LINE__);
1789         countb++;
1790         bpb = bp;
1791     }
1792
1793     for (bp = cm_data.buf_freeListp; bp; bp=(cm_buf_t *) osi_QNext(&bp->q)) {
1794         if (bp->magic != CM_BUF_MAGIC)
1795             osi_panic("buf magic error",__FILE__,__LINE__);
1796         countf++;
1797         bpf = bp;
1798     }
1799
1800     for (bp = cm_data.buf_allp; bp; bp=bp->allp) {
1801         if (bp->magic != CM_BUF_MAGIC)
1802             osi_panic("buf magic error",__FILE__,__LINE__);
1803         counta++;
1804         bpa = bp;
1805     }
1806     lock_ReleaseRead(&buf_globalLock);
1807
1808     if (countb != countf)
1809         osi_panic("buf magic error",__FILE__,__LINE__);
1810
1811     if (counta != cm_data.buf_nbuffers)
1812         osi_panic("buf magic error",__FILE__,__LINE__);
1813 }
1814 #endif /* TESTING */
1815
1816 /* dump the contents of the buf_scacheHashTablepp. */
1817 int cm_DumpBufHashTable(FILE *outputFile, char *cookie, int lock)
1818 {
1819     int zilch;
1820     cm_buf_t *bp;
1821     char output[1024];
1822     afs_uint32 i;
1823
1824     if (cm_data.buf_scacheHashTablepp == NULL)
1825         return -1;
1826
1827     if (lock)
1828         lock_ObtainRead(&buf_globalLock);
1829
1830     StringCbPrintfA(output, sizeof(output), "%s - dumping buf_HashTable - buf_hashSize=%d\r\n",
1831                     cookie, cm_data.buf_hashSize);
1832     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1833
1834     for (i = 0; i < cm_data.buf_hashSize; i++)
1835     {
1836         for (bp = cm_data.buf_scacheHashTablepp[i]; bp; bp=bp->hashp)
1837         {
1838             StringCbPrintfA(output, sizeof(output),
1839                             "%s bp=0x%08X, hash=%d, fid (cell=%d, volume=%d, "
1840                             "vnode=%d, unique=%d), offset=%x:%08x, dv=%I64d, "
1841                             "flags=0x%x, cmFlags=0x%x, error=0x%x, refCount=%d\r\n",
1842                              cookie, (void *)bp, i, bp->fid.cell, bp->fid.volume,
1843                              bp->fid.vnode, bp->fid.unique, bp->offset.HighPart,
1844                              bp->offset.LowPart, bp->dataVersion, bp->flags,
1845                              bp->cmFlags, bp->error, bp->refCount);
1846             WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1847         }
1848     }
1849
1850     StringCbPrintfA(output, sizeof(output), "%s - Done dumping buf_HashTable.\r\n", cookie);
1851     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1852
1853     StringCbPrintfA(output, sizeof(output), "%s - dumping buf_freeListEndp\r\n", cookie);
1854     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1855     for(bp = cm_data.buf_freeListEndp; bp; bp=(cm_buf_t *) osi_QPrev(&bp->q)) {
1856         StringCbPrintfA(output, sizeof(output),
1857                          "%s bp=0x%08X, fid (cell=%d, volume=%d, "
1858                          "vnode=%d, unique=%d), offset=%x:%08x, dv=%I64d, "
1859                          "flags=0x%x, cmFlags=0x%x, error=0x%x, refCount=%d\r\n",
1860                          cookie, (void *)bp, bp->fid.cell, bp->fid.volume,
1861                          bp->fid.vnode, bp->fid.unique, bp->offset.HighPart,
1862                          bp->offset.LowPart, bp->dataVersion, bp->flags,
1863                          bp->cmFlags, bp->error, bp->refCount);
1864         WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1865     }
1866     StringCbPrintfA(output, sizeof(output), "%s - Done dumping buf_FreeListEndp.\r\n", cookie);
1867     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1868
1869     StringCbPrintfA(output, sizeof(output), "%s - dumping buf_dirtyListp\r\n", cookie);
1870     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1871     for(bp = cm_data.buf_dirtyListp; bp; bp=bp->dirtyp) {
1872         StringCbPrintfA(output, sizeof(output),
1873                          "%s bp=0x%08X, fid (cell=%d, volume=%d, "
1874                          "vnode=%d, unique=%d), offset=%x:%08x, dv=%I64d, "
1875                          "flags=0x%x, cmFlags=0x%x, error=0x%x, refCount=%d\r\n",
1876                          cookie, (void *)bp, bp->fid.cell, bp->fid.volume,
1877                          bp->fid.vnode, bp->fid.unique, bp->offset.HighPart,
1878                          bp->offset.LowPart, bp->dataVersion, bp->flags,
1879                          bp->cmFlags, bp->error, bp->refCount);
1880         WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1881     }
1882     StringCbPrintfA(output, sizeof(output), "%s - Done dumping buf_dirtyListp.\r\n", cookie);
1883     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1884
1885     if (lock)
1886         lock_ReleaseRead(&buf_globalLock);
1887     return 0;
1888 }
1889
1890 void buf_ForceTrace(BOOL flush)
1891 {
1892     HANDLE handle;
1893     int len;
1894     char buf[256];
1895
1896     if (!buf_logp)
1897         return;
1898
1899     len = GetTempPath(sizeof(buf)-10, buf);
1900     StringCbCopyA(&buf[len], sizeof(buf)-len, "/afs-buffer.log");
1901     handle = CreateFile(buf, GENERIC_WRITE, FILE_SHARE_READ,
1902                             NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
1903     if (handle == INVALID_HANDLE_VALUE) {
1904         osi_panic("Cannot create log file", __FILE__, __LINE__);
1905     }
1906     osi_LogPrint(buf_logp, handle);
1907     if (flush)
1908         FlushFileBuffers(handle);
1909     CloseHandle(handle);
1910 }
1911
1912 long buf_DirtyBuffersExist(cm_fid_t *fidp)
1913 {
1914     cm_buf_t *bp;
1915     afs_uint32 bcount = 0;
1916     afs_uint32 i;
1917
1918     i = BUF_FILEHASH(fidp);
1919
1920     for (bp = cm_data.buf_fileHashTablepp[i]; bp; bp=bp->allp, bcount++) {
1921         if (!cm_FidCmp(fidp, &bp->fid) && (bp->flags & CM_BUF_DIRTY))
1922             return 1;
1923     }
1924     return 0;
1925 }
1926
1927 #if 0
1928 long buf_CleanDirtyBuffers(cm_scache_t *scp)
1929 {
1930     cm_buf_t *bp;
1931     afs_uint32 bcount = 0;
1932     cm_fid_t * fidp = &scp->fid;
1933
1934     for (bp = cm_data.buf_allp; bp; bp=bp->allp, bcount++) {
1935         if (!cm_FidCmp(fidp, &bp->fid) && (bp->flags & CM_BUF_DIRTY)) {
1936             buf_Hold(bp);
1937             lock_ObtainMutex(&bp->mx);
1938             bp->cmFlags &= ~CM_BUF_CMSTORING;
1939             bp->flags &= ~CM_BUF_DIRTY;
1940             bp->dirty_offset = 0;
1941             bp->dirty_length = 0;
1942             bp->flags |= CM_BUF_ERROR;
1943             bp->error = VNOVNODE;
1944             bp->dataVersion = CM_BUF_VERSION_BAD; /* bad */
1945             bp->dirtyCounter++;
1946             if (bp->flags & CM_BUF_WAITING) {
1947                 osi_Log2(buf_logp, "BUF CleanDirtyBuffers Waking [scp 0x%x] bp 0x%x", scp, bp);
1948                 osi_Wakeup((long) &bp);
1949             }
1950             lock_ReleaseMutex(&bp->mx);
1951             buf_Release(bp);
1952         }
1953     }
1954     return 0;
1955 }
1956 #endif