2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
9 * Portions Copyright (c) 2005-2008 Sine Nomine Associates
15 Institution: The Information Technology Center, Carnegie-Mellon University
18 #include <afsconfig.h>
19 #include <afs/param.h>
20 #define MAXINT (~(1<<((sizeof(int)*8)-1)))
27 #ifdef AFS_PTHREAD_ENV
29 #else /* AFS_PTHREAD_ENV */
30 #include <afs/assert.h>
31 #endif /* AFS_PTHREAD_ENV */
34 #include "rx/rx_queue.h"
35 #include <afs/afsint.h>
37 #include <afs/errors.h>
40 #include <afs/afssyscalls.h>
44 #include "volume_inline.h"
45 #include "vnode_inline.h"
46 #include "partition.h"
49 #if defined(AFS_SGI_ENV)
50 #include "sys/types.h"
62 #include <sys/fcntl.h>
65 #endif /* AFS_NT40_ENV */
72 struct VnodeClassInfo VnodeClassInfo[nVNODECLASSES];
74 void VNLog(afs_int32 aop, afs_int32 anparms, ... );
81 #define BAD_IGET -1000
83 /* There are two separate vnode queue types defined here:
84 * Each hash conflict chain -- is singly linked, with a single head
85 * pointer. New entries are added at the beginning. Old
86 * entries are removed by linear search, which generally
87 * only occurs after a disk read).
88 * LRU chain -- is doubly linked, single head pointer.
89 * Entries are added at the head, reclaimed from the tail,
90 * or removed from anywhere in the queue.
94 /* Vnode hash table. Find hash chain by taking lower bits of
95 * (volume_hash_offset + vnode).
96 * This distributes the root inodes of the volumes over the
97 * hash table entries and also distributes the vnodes of
98 * volumes reasonably fairly. The volume_hash_offset field
99 * for each volume is established as the volume comes on line
100 * by using the VOLUME_HASH_OFFSET macro. This distributes the
101 * volumes fairly among the cache entries, both when servicing
102 * a small number of volumes and when servicing a large number.
105 /* logging stuff for finding bugs */
106 #define THELOGSIZE 5120
107 static afs_int32 theLog[THELOGSIZE];
108 static afs_int32 vnLogPtr = 0;
110 VNLog(afs_int32 aop, afs_int32 anparms, ... )
115 va_start(ap, anparms);
118 anparms = 4; /* do bounds checking */
120 temp = (aop << 16) | anparms;
121 theLog[vnLogPtr++] = temp;
122 if (vnLogPtr >= THELOGSIZE)
124 for (temp = 0; temp < anparms; temp++) {
125 theLog[vnLogPtr++] = va_arg(ap, afs_int32);
126 if (vnLogPtr >= THELOGSIZE)
132 /* VolumeHashOffset -- returns a new value to be stored in the
133 * volumeHashOffset of a Volume structure. Called when a
134 * volume is initialized. Sets the volumeHashOffset so that
135 * vnode cache entries are distributed reasonably between
136 * volumes (the root vnodes of the volumes will hash to
137 * different values, and spacing is maintained between volumes
138 * when there are not many volumes represented), and spread
139 * equally amongst vnodes within a single volume.
142 VolumeHashOffset_r(void)
144 static int nextVolumeHashOffset = 0;
145 /* hashindex Must be power of two in size */
147 # define hashMask ((1<<hashShift)-1)
148 static byte hashindex[1 << hashShift] =
149 { 0, 128, 64, 192, 32, 160, 96, 224 };
151 offset = hashindex[nextVolumeHashOffset & hashMask]
152 + (nextVolumeHashOffset >> hashShift);
153 nextVolumeHashOffset++;
157 /* Change hashindex (above) if you change this constant */
158 #define VNODE_HASH_TABLE_SIZE 256
159 private Vnode *VnodeHashTable[VNODE_HASH_TABLE_SIZE];
160 #define VNODE_HASH(volumeptr,vnodenumber)\
161 ((volumeptr->vnodeHashOffset + vnodenumber)&(VNODE_HASH_TABLE_SIZE-1))
165 * add a vnode to the volume's vnode list.
167 * @param[in] vp volume object pointer
168 * @param[in] vnp vnode object pointer
170 * @note for DAFS, it may seem like we should be acquiring a lightweight ref
171 * on vp, but this would actually break things. Right now, this is ok
172 * because we destroy all vnode cache contents during during volume
177 * @internal volume package internal use only
180 AddToVVnList(Volume * vp, Vnode * vnp)
182 if (queue_IsOnQueue(vnp))
186 Vn_cacheCheck(vnp) = vp->cacheCheck;
187 queue_Append(&vp->vnode_list, vnp);
188 Vn_stateFlags(vnp) |= VN_ON_VVN;
192 * delete a vnode from the volume's vnode list.
196 * @internal volume package internal use only
199 DeleteFromVVnList(Vnode * vnp)
201 Vn_volume(vnp) = NULL;
203 if (!queue_IsOnQueue(vnp))
207 Vn_stateFlags(vnp) &= ~(VN_ON_VVN);
211 * add a vnode to the end of the lru.
213 * @param[in] vcp vnode class info object pointer
214 * @param[in] vnp vnode object pointer
216 * @internal vnode package internal use only
219 AddToVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp)
221 if (Vn_stateFlags(vnp) & VN_ON_LRU) {
225 /* Add it to the circular LRU list */
226 if (vcp->lruHead == NULL)
227 Abort("VPutVnode: vcp->lruHead==NULL");
229 vnp->lruNext = vcp->lruHead;
230 vnp->lruPrev = vcp->lruHead->lruPrev;
231 vcp->lruHead->lruPrev = vnp;
232 vnp->lruPrev->lruNext = vnp;
236 /* If the vnode was just deleted, put it at the end of the chain so it
237 * will be reused immediately */
239 vcp->lruHead = vnp->lruNext;
241 Vn_stateFlags(vnp) |= VN_ON_LRU;
245 * delete a vnode from the lru.
247 * @param[in] vcp vnode class info object pointer
248 * @param[in] vnp vnode object pointer
250 * @internal vnode package internal use only
253 DeleteFromVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp)
255 if (!(Vn_stateFlags(vnp) & VN_ON_LRU)) {
259 if (vnp == vcp->lruHead)
260 vcp->lruHead = vcp->lruHead->lruNext;
262 if ((vnp == vcp->lruHead) ||
263 (vcp->lruHead == NULL))
264 Abort("DeleteFromVnLRU: lru chain addled!\n");
266 vnp->lruPrev->lruNext = vnp->lruNext;
267 vnp->lruNext->lruPrev = vnp->lruPrev;
269 Vn_stateFlags(vnp) &= ~(VN_ON_LRU);
273 * add a vnode to the vnode hash table.
275 * @param[in] vnp vnode object pointer
279 * @post vnode on hash
281 * @internal vnode package internal use only
284 AddToVnHash(Vnode * vnp)
286 unsigned int newHash;
288 if (!(Vn_stateFlags(vnp) & VN_ON_HASH)) {
289 newHash = VNODE_HASH(Vn_volume(vnp), Vn_id(vnp));
290 vnp->hashNext = VnodeHashTable[newHash];
291 VnodeHashTable[newHash] = vnp;
292 vnp->hashIndex = newHash;
294 Vn_stateFlags(vnp) |= VN_ON_HASH;
299 * delete a vnode from the vnode hash table.
306 * @post vnode removed from hash
308 * @internal vnode package internal use only
311 DeleteFromVnHash(Vnode * vnp)
315 if (Vn_stateFlags(vnp) & VN_ON_HASH) {
316 tvnp = VnodeHashTable[vnp->hashIndex];
318 VnodeHashTable[vnp->hashIndex] = vnp->hashNext;
320 while (tvnp && tvnp->hashNext != vnp)
321 tvnp = tvnp->hashNext;
323 tvnp->hashNext = vnp->hashNext;
326 vnp->hashNext = NULL;
328 Vn_stateFlags(vnp) &= ~(VN_ON_HASH);
334 * invalidate a vnode cache entry.
336 * @param[in] avnode vnode object pointer
340 * @post vnode metadata invalidated.
341 * vnode removed from hash table.
342 * DAFS: vnode state set to VN_STATE_INVALID.
344 * @internal vnode package internal use only
347 VInvalidateVnode_r(struct Vnode *avnode)
349 avnode->changed_newTime = 0; /* don't let it get flushed out again */
350 avnode->changed_oldTime = 0;
351 avnode->delete = 0; /* it isn't deleted, really */
352 avnode->cacheCheck = 0; /* invalid: prevents future vnode searches from working */
353 DeleteFromVnHash(avnode);
354 #ifdef AFS_DEMAND_ATTACH_FS
355 VnChangeState_r(avnode, VN_STATE_INVALID);
361 * initialize vnode cache for a given vnode class.
363 * @param[in] class vnode class
364 * @param[in] nVnodes size of cache
366 * @post vnode cache allocated and initialized
368 * @internal volume package internal use only
370 * @note generally called by VInitVolumePackage_r
372 * @see VInitVolumePackage_r
375 VInitVnodes(VnodeClass class, int nVnodes)
378 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
380 vcp->allocs = vcp->gets = vcp->reads = vcp->writes = 0;
381 vcp->cacheSize = nVnodes;
384 assert(CHECKSIZE_SMALLVNODE);
386 vcp->residentSize = SIZEOF_SMALLVNODE;
387 vcp->diskSize = SIZEOF_SMALLDISKVNODE;
388 vcp->magic = SMALLVNODEMAGIC;
392 vcp->residentSize = SIZEOF_LARGEVNODE;
393 vcp->diskSize = SIZEOF_LARGEDISKVNODE;
394 vcp->magic = LARGEVNODEMAGIC;
398 int s = vcp->diskSize - 1;
408 va = (byte *) calloc(nVnodes, vcp->residentSize);
411 Vnode *vnp = (Vnode *) va;
412 Vn_refcount(vnp) = 0; /* no context switches */
413 Vn_stateFlags(vnp) |= VN_ON_LRU;
414 #ifdef AFS_DEMAND_ATTACH_FS
415 assert(pthread_cond_init(&Vn_stateCV(vnp), NULL) == 0);
416 Vn_state(vnp) = VN_STATE_INVALID;
418 #else /* !AFS_DEMAND_ATTACH_FS */
419 Lock_Init(&vnp->lock);
420 #endif /* !AFS_DEMAND_ATTACH_FS */
421 vnp->changed_oldTime = 0;
422 vnp->changed_newTime = 0;
423 Vn_volume(vnp) = NULL;
424 Vn_cacheCheck(vnp) = 0;
425 vnp->delete = Vn_id(vnp) = 0;
426 #ifdef AFS_PTHREAD_ENV
427 vnp->writer = (pthread_t) 0;
428 #else /* AFS_PTHREAD_ENV */
429 vnp->writer = (PROCESS) 0;
430 #endif /* AFS_PTHREAD_ENV */
434 if (vcp->lruHead == NULL)
435 vcp->lruHead = vnp->lruNext = vnp->lruPrev = vnp;
437 vnp->lruNext = vcp->lruHead;
438 vnp->lruPrev = vcp->lruHead->lruPrev;
439 vcp->lruHead->lruPrev = vnp;
440 vnp->lruPrev->lruNext = vnp;
443 va += vcp->residentSize;
450 * allocate an unused vnode from the lru chain.
452 * @param[in] vcp vnode class info object pointer
454 * @pre VOL_LOCK is held
456 * @post vnode object is removed from lru, and vnode hash table.
457 * vnode is disassociated from volume object.
458 * state is set to VN_STATE_INVALID.
459 * inode handle is released.
461 * @note we traverse backwards along the lru circlist. It shouldn't
462 * be necessary to specify that nUsers == 0 since if it is in the list,
463 * nUsers should be 0. Things shouldn't be in lruq unless no one is
466 * @warning DAFS: VOL_LOCK is dropped while doing inode handle release
468 * @return vnode object pointer
471 VGetFreeVnode_r(struct VnodeClassInfo * vcp)
475 vnp = vcp->lruHead->lruPrev;
476 #ifdef AFS_DEMAND_ATTACH_FS
477 if (Vn_refcount(vnp) != 0 || VnIsExclusiveState(Vn_state(vnp)) ||
478 Vn_readers(vnp) != 0)
479 Abort("VGetFreeVnode_r: in-use vnode in lruq");
481 if (Vn_refcount(vnp) != 0 || CheckLock(&vnp->lock))
482 Abort("VGetFreeVnode_r: locked vnode in lruq");
484 VNLog(1, 2, Vn_id(vnp), (intptr_t)vnp, 0, 0);
487 * it's going to be overwritten soon enough.
488 * remove from LRU, delete hash entry, and
489 * disassociate from old parent volume before
490 * we have a chance to drop the vol glock
492 DeleteFromVnLRU(vcp, vnp);
493 DeleteFromVnHash(vnp);
494 if (Vn_volume(vnp)) {
495 DeleteFromVVnList(vnp);
498 /* drop the file descriptor */
500 #ifdef AFS_DEMAND_ATTACH_FS
501 VnChangeState_r(vnp, VN_STATE_RELEASING);
504 /* release is, potentially, a highly latent operation due to a couple
506 * - ihandle package lock contention
507 * - closing file descriptor(s) associated with ih
509 * Hance, we perform outside of the volume package lock in order to
510 * reduce the probability of contention.
512 IH_RELEASE(vnp->handle);
513 #ifdef AFS_DEMAND_ATTACH_FS
518 #ifdef AFS_DEMAND_ATTACH_FS
519 VnChangeState_r(vnp, VN_STATE_INVALID);
527 * lookup a vnode in the vnode cache hash table.
529 * @param[in] vp pointer to volume object
530 * @param[in] vnodeId vnode id
534 * @post matching vnode object or NULL is returned
536 * @return vnode object pointer
537 * @retval NULL no matching vnode object was found in the cache
539 * @internal vnode package internal use only
541 * @note this symbol is exported strictly for fssync debug protocol use
544 VLookupVnode(Volume * vp, VnodeId vnodeId)
547 unsigned int newHash;
549 newHash = VNODE_HASH(vp, vnodeId);
550 for (vnp = VnodeHashTable[newHash];
552 ((Vn_id(vnp) != vnodeId) ||
553 (Vn_volume(vnp) != vp) ||
554 (vp->cacheCheck != Vn_cacheCheck(vnp))));
555 vnp = vnp->hashNext);
562 VAllocVnode(Error * ec, Volume * vp, VnodeType type)
566 retVal = VAllocVnode_r(ec, vp, type);
572 * allocate a new vnode.
574 * @param[out] ec error code return
575 * @param[in] vp volume object pointer
576 * @param[in] type desired vnode type
578 * @return vnode object pointer
580 * @pre VOL_LOCK held;
581 * heavyweight ref held on vp
583 * @post vnode allocated and returned
586 VAllocVnode_r(Error * ec, Volume * vp, VnodeType type)
591 struct VnodeClassInfo *vcp;
594 #ifdef AFS_DEMAND_ATTACH_FS
595 VolState vol_state_save;
600 #ifdef AFS_DEMAND_ATTACH_FS
602 * once a volume has entered an error state, don't permit
603 * further operations to proceed
604 * -- tkeiser 11/21/2007
606 VWaitExclusiveState_r(vp);
607 if (VIsErrorState(V_attachState(vp))) {
608 /* XXX is VSALVAGING acceptable here? */
614 if (programType == fileServer && !V_inUse(vp)) {
615 if (vp->specialStatus) {
616 *ec = vp->specialStatus;
622 class = vnodeTypeToClass(type);
623 vcp = &VnodeClassInfo[class];
625 if (!VolumeWriteable(vp)) {
626 *ec = (bit32) VREADONLY;
630 unique = vp->nextVnodeUnique++;
632 unique = vp->nextVnodeUnique++;
634 if (vp->nextVnodeUnique > V_uniquifier(vp)) {
635 VUpdateVolume_r(ec, vp, 0);
640 if (programType == fileServer) {
641 VAddToVolumeUpdateList_r(ec, vp);
646 /* Find a slot in the bit map */
647 bitNumber = VAllocBitmapEntry_r(ec, vp, &vp->vnodeIndex[class],
648 VOL_ALLOC_BITMAP_WAIT);
651 vnodeNumber = bitNumberToVnodeNumber(bitNumber, class);
655 * at this point we should be assured that V_attachState(vp) is non-exclusive
659 VNLog(2, 1, vnodeNumber, 0, 0, 0);
660 /* Prepare to move it to the new hash chain */
661 vnp = VLookupVnode(vp, vnodeNumber);
663 /* slot already exists. May even not be in lruq (consider store file locking a file being deleted)
664 * so we may have to wait for it below */
665 VNLog(3, 2, vnodeNumber, (intptr_t)vnp, 0, 0);
667 VnCreateReservation_r(vnp);
668 if (Vn_refcount(vnp) == 1) {
669 /* we're the only user */
670 /* This won't block */
671 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
673 /* other users present; follow locking hierarchy */
674 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, MIGHT_DEADLOCK);
676 #ifdef AFS_DEMAND_ATTACH_FS
679 * vnode was cached, wait for any existing exclusive ops to finish.
680 * once we have reacquired the lock, re-verify volume state.
682 * note: any vnode error state is related to the old vnode; disregard.
684 VnWaitQuiescent_r(vnp);
685 if (VIsErrorState(V_attachState(vp))) {
686 VnUnlock(vnp, WRITE_LOCK);
687 VnCancelReservation_r(vnp);
694 * verify state of the world hasn't changed
696 * (technically, this should never happen because cachecheck
697 * is only updated during a volume attach, which should not
698 * happen when refs are held)
700 if (Vn_volume(vnp)->cacheCheck != Vn_cacheCheck(vnp)) {
701 VnUnlock(vnp, WRITE_LOCK);
702 VnCancelReservation_r(vnp);
708 /* no such vnode in the cache */
710 vnp = VGetFreeVnode_r(vcp);
712 /* Initialize the header fields so noone allocates another
713 * vnode with the same number */
714 Vn_id(vnp) = vnodeNumber;
715 VnCreateReservation_r(vnp);
716 AddToVVnList(vp, vnp);
717 #ifdef AFS_DEMAND_ATTACH_FS
721 /* This will never block (guaranteed by check in VGetFreeVnode_r() */
722 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
724 #ifdef AFS_DEMAND_ATTACH_FS
725 VnChangeState_r(vnp, VN_STATE_ALLOC);
728 /* Sanity check: is this vnode really not in use? */
731 IHandle_t *ihP = vp->vnodeIndex[class].handle;
733 afs_foff_t off = vnodeIndexOffset(vcp, vnodeNumber);
736 /* XXX we have a potential race here if two threads
737 * allocate new vnodes at the same time, and they
738 * both decide it's time to extend the index
741 #ifdef AFS_DEMAND_ATTACH_FS
743 * this race has been eliminated for the DAFS case
744 * using exclusive state VOL_STATE_VNODE_ALLOC
746 * if this becomes a bottleneck, there are ways to
747 * improve parallelism for this code path
748 * -- tkeiser 11/28/2007
750 VCreateReservation_r(vp);
751 VWaitExclusiveState_r(vp);
752 vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_ALLOC);
758 Log("VAllocVnode: can't open index file!\n");
760 goto error_encountered;
762 if ((size = FDH_SIZE(fdP)) < 0) {
763 Log("VAllocVnode: can't stat index file!\n");
765 goto error_encountered;
767 if (FDH_SEEK(fdP, off, SEEK_SET) < 0) {
768 Log("VAllocVnode: can't seek on index file!\n");
770 goto error_encountered;
772 if (off + vcp->diskSize <= size) {
773 if (FDH_READ(fdP, &vnp->disk, vcp->diskSize) != vcp->diskSize) {
774 Log("VAllocVnode: can't read index file!\n");
776 goto error_encountered;
778 if (vnp->disk.type != vNull) {
779 Log("VAllocVnode: addled bitmap or index!\n");
781 goto error_encountered;
784 /* growing file - grow in a reasonable increment */
785 char *buf = (char *)malloc(16 * 1024);
787 Log("VAllocVnode: can't grow vnode index: out of memory\n");
789 goto error_encountered;
791 memset(buf, 0, 16 * 1024);
792 if ((FDH_WRITE(fdP, buf, 16 * 1024)) != 16 * 1024) {
793 Log("VAllocVnode: can't grow vnode index: write failed\n");
796 goto error_encountered;
803 #ifdef AFS_DEMAND_ATTACH_FS
804 VChangeState_r(vp, vol_state_save);
805 VCancelReservation_r(vp);
812 * close the file handle
814 * invalidate the vnode
815 * free up the bitmap entry (although salvager should take care of it)
817 * drop vnode lock and refs
822 VFreeBitMapEntry_r(&tmp, &vp->vnodeIndex[class], bitNumber);
823 VInvalidateVnode_r(vnp);
824 VnUnlock(vnp, WRITE_LOCK);
825 VnCancelReservation_r(vnp);
826 #ifdef AFS_DEMAND_ATTACH_FS
827 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
828 VCancelReservation_r(vp);
830 VForceOffline_r(vp, 0);
835 VNLog(4, 2, vnodeNumber, (intptr_t)vnp, 0, 0);
836 #ifndef AFS_DEMAND_ATTACH_FS
841 VNLog(5, 1, (intptr_t)vnp, 0, 0, 0);
842 memset(&vnp->disk, 0, sizeof(vnp->disk));
843 vnp->changed_newTime = 0; /* set this bit when vnode is updated */
844 vnp->changed_oldTime = 0; /* set this on CopyOnWrite. */
846 vnp->disk.vnodeMagic = vcp->magic;
847 vnp->disk.type = type;
848 vnp->disk.uniquifier = unique;
851 vp->header->diskstuff.filecount++;
852 #ifdef AFS_DEMAND_ATTACH_FS
853 VnChangeState_r(vnp, VN_STATE_EXCLUSIVE);
859 * load a vnode from disk.
861 * @param[out] ec client error code return
862 * @param[in] vp volume object pointer
863 * @param[in] vnp vnode object pointer
864 * @param[in] vcp vnode class info object pointer
865 * @param[in] class vnode class enumeration
867 * @pre vnode is registered in appropriate data structures;
868 * caller holds a ref on vnode; VOL_LOCK is held
870 * @post vnode data is loaded from disk.
871 * vnode state is set to VN_STATE_ONLINE.
872 * on failure, vnode is invalidated.
874 * @internal vnode package internal use only
877 VnLoad(Error * ec, Volume * vp, Vnode * vnp,
878 struct VnodeClassInfo * vcp, VnodeClass class)
880 /* vnode not cached */
884 IHandle_t *ihP = vp->vnodeIndex[class].handle;
890 #ifdef AFS_DEMAND_ATTACH_FS
891 VnChangeState_r(vnp, VN_STATE_LOAD);
894 /* This will never block */
895 VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
900 Log("VnLoad: can't open index dev=%u, i=%s\n", vp->device,
901 PrintInode(NULL, vp->vnodeIndex[class].handle->ih_ino));
903 goto error_encountered_nolock;
904 } else if (FDH_SEEK(fdP, vnodeIndexOffset(vcp, Vn_id(vnp)), SEEK_SET)
906 Log("VnLoad: can't seek on index file vn=%u\n", Vn_id(vnp));
908 goto error_encountered_nolock;
909 } else if ((nBytes = FDH_READ(fdP, (char *)&vnp->disk, vcp->diskSize))
911 /* Don't take volume off line if the inumber is out of range
912 * or the inode table is full. */
913 if (nBytes == BAD_IGET) {
914 Log("VnLoad: bad inumber %s\n",
915 PrintInode(NULL, vp->vnodeIndex[class].handle->ih_ino));
918 } else if (nBytes == -1 && errno == EIO) {
919 /* disk error; salvage */
920 Log("VnLoad: Couldn't read vnode %u, volume %u (%s); volume needs salvage\n", Vn_id(vnp), V_id(vp), V_name(vp));
922 /* vnode is not allocated */
924 Log("VnLoad: Couldn't read vnode %u, volume %u (%s); read %d bytes, errno %d\n",
925 Vn_id(vnp), V_id(vp), V_name(vp), (int)nBytes, errno);
929 goto error_encountered_nolock;
934 /* Quick check to see that the data is reasonable */
935 if (vnp->disk.vnodeMagic != vcp->magic || vnp->disk.type == vNull) {
936 if (vnp->disk.type == vNull) {
940 struct vnodeIndex *index = &vp->vnodeIndex[class];
941 unsigned int bitNumber = vnodeIdToBitNumber(Vn_id(vnp));
942 unsigned int offset = bitNumber >> 3;
944 /* Test to see if vnode number is valid. */
945 if ((offset >= index->bitmapSize)
946 || ((*(index->bitmap + offset) & (1 << (bitNumber & 0x7)))
948 Log("VnLoad: Request for unallocated vnode %u, volume %u (%s) denied.\n", Vn_id(vnp), V_id(vp), V_name(vp));
952 Log("VnLoad: Bad magic number, vnode %u, volume %u (%s); volume needs salvage\n", Vn_id(vnp), V_id(vp), V_name(vp));
955 goto error_encountered;
958 IH_INIT(vnp->handle, V_device(vp), V_parentId(vp), VN_GET_INO(vnp));
959 VnUnlock(vnp, WRITE_LOCK);
960 #ifdef AFS_DEMAND_ATTACH_FS
961 VnChangeState_r(vnp, VN_STATE_ONLINE);
966 error_encountered_nolock:
968 FDH_REALLYCLOSE(fdP);
974 #ifdef AFS_DEMAND_ATTACH_FS
975 VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, 0);
977 VForceOffline_r(vp, 0);
984 VInvalidateVnode_r(vnp);
985 VnUnlock(vnp, WRITE_LOCK);
989 * store a vnode to disk.
991 * @param[out] ec error code output
992 * @param[in] vp volume object pointer
993 * @param[in] vnp vnode object pointer
994 * @param[in] vcp vnode class info object pointer
995 * @param[in] class vnode class enumeration
997 * @pre VOL_LOCK held.
998 * caller holds refs to volume and vnode.
999 * DAFS: caller is responsible for performing state sanity checks.
1001 * @post vnode state is stored to disk.
1003 * @internal vnode package internal use only
1006 VnStore(Error * ec, Volume * vp, Vnode * vnp,
1007 struct VnodeClassInfo * vcp, VnodeClass class)
1011 IHandle_t *ihP = vp->vnodeIndex[class].handle;
1013 #ifdef AFS_DEMAND_ATTACH_FS
1014 VnState vn_state_save;
1019 #ifdef AFS_DEMAND_ATTACH_FS
1020 vn_state_save = VnChangeState_r(vnp, VN_STATE_STORE);
1023 offset = vnodeIndexOffset(vcp, Vn_id(vnp));
1027 Log("VnStore: can't open index file!\n");
1028 goto error_encountered;
1030 if (FDH_SEEK(fdP, offset, SEEK_SET) < 0) {
1031 Log("VnStore: can't seek on index file! fdp=%"AFS_PTR_FMT
1032 " offset=%d, errno=%d\n",
1033 fdP, (int) offset, errno);
1034 goto error_encountered;
1037 nBytes = FDH_WRITE(fdP, &vnp->disk, vcp->diskSize);
1038 if (nBytes != vcp->diskSize) {
1039 /* Don't force volume offline if the inumber is out of
1040 * range or the inode table is full.
1042 FDH_REALLYCLOSE(fdP);
1043 if (nBytes == BAD_IGET) {
1044 Log("VnStore: bad inumber %s\n",
1046 vp->vnodeIndex[class].handle->ih_ino));
1049 #ifdef AFS_DEMAND_ATTACH_FS
1050 VnChangeState_r(vnp, VN_STATE_ERROR);
1053 Log("VnStore: Couldn't write vnode %u, volume %u (%s) (error %d)\n", Vn_id(vnp), V_id(Vn_volume(vnp)), V_name(Vn_volume(vnp)), (int)nBytes);
1054 #ifdef AFS_DEMAND_ATTACH_FS
1055 goto error_encountered;
1058 VForceOffline_r(vp, 0);
1068 #ifdef AFS_DEMAND_ATTACH_FS
1069 VnChangeState_r(vnp, vn_state_save);
1074 #ifdef AFS_DEMAND_ATTACH_FS
1075 /* XXX instead of dumping core, let's try to request a salvage
1076 * and just fail the putvnode */
1080 VnChangeState_r(vnp, VN_STATE_ERROR);
1081 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
1088 * get a handle to a vnode object.
1090 * @param[out] ec error code
1091 * @param[in] vp volume object
1092 * @param[in] vnodeNumber vnode id
1093 * @param[in] locktype type of lock to acquire
1095 * @return vnode object pointer
1100 VGetVnode(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype)
1101 { /* READ_LOCK or WRITE_LOCK, as defined in lock.h */
1104 retVal = VGetVnode_r(ec, vp, vnodeNumber, locktype);
1110 * get a handle to a vnode object.
1112 * @param[out] ec error code
1113 * @param[in] vp volume object
1114 * @param[in] vnodeNumber vnode id
1115 * @param[in] locktype type of lock to acquire
1117 * @return vnode object pointer
1119 * @internal vnode package internal use only
1121 * @pre VOL_LOCK held.
1122 * heavyweight ref held on volume object.
1125 VGetVnode_r(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype)
1126 { /* READ_LOCK or WRITE_LOCK, as defined in lock.h */
1129 struct VnodeClassInfo *vcp;
1133 if (vnodeNumber == 0) {
1138 VNLog(100, 1, vnodeNumber, 0, 0, 0);
1140 #ifdef AFS_DEMAND_ATTACH_FS
1142 * once a volume has entered an error state, don't permit
1143 * further operations to proceed
1144 * -- tkeiser 11/21/2007
1146 VWaitExclusiveState_r(vp);
1147 if (VIsErrorState(V_attachState(vp))) {
1148 /* XXX is VSALVAGING acceptable here? */
1154 if (programType == fileServer && !V_inUse(vp)) {
1155 *ec = (vp->specialStatus ? vp->specialStatus : VOFFLINE);
1157 /* If the volume is VBUSY (being cloned or dumped) and this is
1158 * a READ operation, then don't fail.
1160 if ((*ec != VBUSY) || (locktype != READ_LOCK)) {
1165 class = vnodeIdToClass(vnodeNumber);
1166 vcp = &VnodeClassInfo[class];
1167 if (locktype == WRITE_LOCK && !VolumeWriteable(vp)) {
1168 *ec = (bit32) VREADONLY;
1172 if (locktype == WRITE_LOCK && programType == fileServer) {
1173 VAddToVolumeUpdateList_r(ec, vp);
1181 /* See whether the vnode is in the cache. */
1182 vnp = VLookupVnode(vp, vnodeNumber);
1184 /* vnode is in cache */
1186 VNLog(101, 2, vnodeNumber, (intptr_t)vnp, 0, 0);
1187 VnCreateReservation_r(vnp);
1189 #ifdef AFS_DEMAND_ATTACH_FS
1191 * this is the one DAFS case where we may run into contention.
1192 * here's the basic control flow:
1194 * if locktype is READ_LOCK:
1195 * wait until vnode is not exclusive
1196 * set to VN_STATE_READ
1197 * increment read count
1200 * wait until vnode is quiescent
1201 * set to VN_STATE_EXCLUSIVE
1204 if (locktype == READ_LOCK) {
1205 VnWaitExclusiveState_r(vnp);
1207 VnWaitQuiescent_r(vnp);
1210 if (VnIsErrorState(Vn_state(vnp))) {
1211 VnCancelReservation_r(vnp);
1215 #endif /* AFS_DEMAND_ATTACH_FS */
1217 /* vnode not cached */
1219 /* Not in cache; tentatively grab most distantly used one from the LRU
1222 vnp = VGetFreeVnode_r(vcp);
1225 vnp->changed_newTime = vnp->changed_oldTime = 0;
1227 Vn_id(vnp) = vnodeNumber;
1228 VnCreateReservation_r(vnp);
1229 AddToVVnList(vp, vnp);
1230 #ifdef AFS_DEMAND_ATTACH_FS
1235 * XXX for non-DAFS, there is a serious
1236 * race condition here:
1238 * two threads can race to load a vnode. the net
1239 * result is two struct Vnodes can be allocated
1240 * and hashed, which point to the same underlying
1241 * disk data store. conflicting vnode locks can
1242 * thus be held concurrently.
1244 * for non-DAFS to be safe, VOL_LOCK really shouldn't
1245 * be dropped in VnLoad. Of course, this would likely
1246 * lead to an unacceptable slow-down.
1249 VnLoad(ec, vp, vnp, vcp, class);
1251 VnCancelReservation_r(vnp);
1254 #ifndef AFS_DEMAND_ATTACH_FS
1259 * there is no possibility for contention. we "own" this vnode.
1265 * it is imperative that nothing drop vol lock between here
1266 * and the VnBeginRead/VnChangeState stanza below
1269 VnLock(vnp, locktype, VOL_LOCK_HELD, MIGHT_DEADLOCK);
1271 /* Check that the vnode hasn't been removed while we were obtaining
1273 VNLog(102, 2, vnodeNumber, (intptr_t) vnp, 0, 0);
1274 if ((vnp->disk.type == vNull) || (Vn_cacheCheck(vnp) == 0)) {
1275 VnUnlock(vnp, locktype);
1276 VnCancelReservation_r(vnp);
1278 /* vnode is labelled correctly by now, so we don't have to invalidate it */
1282 #ifdef AFS_DEMAND_ATTACH_FS
1283 if (locktype == READ_LOCK) {
1286 VnChangeState_r(vnp, VN_STATE_EXCLUSIVE);
1290 if (programType == fileServer)
1291 VBumpVolumeUsage_r(Vn_volume(vnp)); /* Hack; don't know where it should be
1292 * called from. Maybe VGetVolume */
1297 int TrustVnodeCacheEntry = 1;
1298 /* This variable is bogus--when it's set to 0, the hash chains fill
1299 up with multiple versions of the same vnode. Should fix this!! */
1301 VPutVnode(Error * ec, Vnode * vnp)
1304 VPutVnode_r(ec, vnp);
1309 * put back a handle to a vnode object.
1311 * @param[out] ec client error code
1312 * @param[in] vnp vnode object pointer
1314 * @pre VOL_LOCK held.
1315 * ref held on vnode.
1317 * @post ref dropped on vnode.
1318 * if vnode was modified or deleted, it is written out to disk
1319 * (assuming a write lock was held).
1321 * @internal volume package internal use only
1324 VPutVnode_r(Error * ec, Vnode * vnp)
1328 struct VnodeClassInfo *vcp;
1331 assert(Vn_refcount(vnp) != 0);
1332 class = vnodeIdToClass(Vn_id(vnp));
1333 vcp = &VnodeClassInfo[class];
1334 assert(vnp->disk.vnodeMagic == vcp->magic);
1335 VNLog(200, 2, Vn_id(vnp), (intptr_t) vnp, 0, 0);
1337 #ifdef AFS_DEMAND_ATTACH_FS
1338 writeLocked = (Vn_state(vnp) == VN_STATE_EXCLUSIVE);
1340 writeLocked = WriteLocked(&vnp->lock);
1345 #ifdef AFS_PTHREAD_ENV
1346 pthread_t thisProcess = pthread_self();
1347 #else /* AFS_PTHREAD_ENV */
1348 PROCESS thisProcess;
1349 LWP_CurrentProcess(&thisProcess);
1350 #endif /* AFS_PTHREAD_ENV */
1351 VNLog(201, 2, (intptr_t) vnp,
1352 ((vnp->changed_newTime) << 1) | ((vnp->
1353 changed_oldTime) << 1) | vnp->
1355 if (thisProcess != vnp->writer)
1356 Abort("VPutVnode: Vnode at %"AFS_PTR_FMT" locked by another process!\n",
1360 if (vnp->changed_oldTime || vnp->changed_newTime || vnp->delete) {
1361 Volume *vp = Vn_volume(vnp);
1362 afs_uint32 now = FT_ApproxTime();
1363 assert(Vn_cacheCheck(vnp) == vp->cacheCheck);
1366 /* No longer any directory entries for this vnode. Free the Vnode */
1367 memset(&vnp->disk, 0, sizeof(vnp->disk));
1368 /* delete flag turned off further down */
1369 VNLog(202, 2, Vn_id(vnp), (intptr_t) vnp, 0, 0);
1370 } else if (vnp->changed_newTime) {
1371 vnp->disk.serverModifyTime = now;
1373 if (vnp->changed_newTime)
1375 V_updateDate(vp) = vp->updateTime = now;
1376 if(V_volUpCounter(vp)<MAXINT)
1377 V_volUpCounter(vp)++;
1380 /* The vnode has been changed. Write it out to disk */
1382 #ifdef AFS_DEMAND_ATTACH_FS
1383 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
1385 assert(V_needsSalvaged(vp));
1389 VnStore(ec, vp, vnp, vcp, class);
1391 /* If the vnode is to be deleted, and we wrote the vnode out,
1392 * free its bitmap entry. Do after the vnode is written so we
1393 * don't allocate from bitmap before the vnode is written
1394 * (doing so could cause a "addled bitmap" message).
1396 if (vnp->delete && !*ec) {
1397 if (Vn_volume(vnp)->header->diskstuff.filecount-- < 1)
1398 Vn_volume(vnp)->header->diskstuff.filecount = 0;
1399 VFreeBitMapEntry_r(ec, &vp->vnodeIndex[class],
1400 vnodeIdToBitNumber(Vn_id(vnp)));
1404 vnp->changed_newTime = vnp->changed_oldTime = 0;
1406 #ifdef AFS_DEMAND_ATTACH_FS
1407 VnChangeState_r(vnp, VN_STATE_ONLINE);
1409 } else { /* Not write locked */
1410 if (vnp->changed_newTime || vnp->changed_oldTime || vnp->delete)
1412 ("VPutVnode: Change or delete flag for vnode "
1413 "%"AFS_PTR_FMT" is set but vnode is not write locked!\n",
1415 #ifdef AFS_DEMAND_ATTACH_FS
1420 /* Do not look at disk portion of vnode after this point; it may
1421 * have been deleted above */
1423 VnUnlock(vnp, ((writeLocked) ? WRITE_LOCK : READ_LOCK));
1424 VnCancelReservation_r(vnp);
1428 * Make an attempt to convert a vnode lock from write to read.
1429 * Do nothing if the vnode isn't write locked or the vnode has
1433 VVnodeWriteToRead(Error * ec, Vnode * vnp)
1437 retVal = VVnodeWriteToRead_r(ec, vnp);
1443 * convert vnode handle from mutually exclusive to shared access.
1445 * @param[out] ec client error code
1446 * @param[in] vnp vnode object pointer
1448 * @return unspecified use (see out argument 'ec' for error code return)
1450 * @pre VOL_LOCK held.
1451 * ref held on vnode.
1452 * write lock held on vnode.
1454 * @post read lock held on vnode.
1455 * if vnode was modified, it has been written to disk.
1457 * @internal volume package internal use only
1460 VVnodeWriteToRead_r(Error * ec, Vnode * vnp)
1464 struct VnodeClassInfo *vcp;
1465 #ifdef AFS_PTHREAD_ENV
1466 pthread_t thisProcess;
1467 #else /* AFS_PTHREAD_ENV */
1468 PROCESS thisProcess;
1469 #endif /* AFS_PTHREAD_ENV */
1472 assert(Vn_refcount(vnp) != 0);
1473 class = vnodeIdToClass(Vn_id(vnp));
1474 vcp = &VnodeClassInfo[class];
1475 assert(vnp->disk.vnodeMagic == vcp->magic);
1476 VNLog(300, 2, Vn_id(vnp), (intptr_t) vnp, 0, 0);
1478 #ifdef AFS_DEMAND_ATTACH_FS
1479 writeLocked = (Vn_state(vnp) == VN_STATE_EXCLUSIVE);
1481 writeLocked = WriteLocked(&vnp->lock);
1488 VNLog(301, 2, (intptr_t) vnp,
1489 ((vnp->changed_newTime) << 1) | ((vnp->
1490 changed_oldTime) << 1) | vnp->
1494 #ifdef AFS_PTHREAD_ENV
1495 thisProcess = pthread_self();
1496 #else /* AFS_PTHREAD_ENV */
1497 LWP_CurrentProcess(&thisProcess);
1498 #endif /* AFS_PTHREAD_ENV */
1499 if (thisProcess != vnp->writer)
1500 Abort("VPutVnode: Vnode at %"AFS_PTR_FMT
1501 " locked by another process!\n", vnp);
1506 if (vnp->changed_oldTime || vnp->changed_newTime) {
1507 Volume *vp = Vn_volume(vnp);
1508 afs_uint32 now = FT_ApproxTime();
1509 assert(Vn_cacheCheck(vnp) == vp->cacheCheck);
1510 if (vnp->changed_newTime)
1511 vnp->disk.serverModifyTime = now;
1512 if (vnp->changed_newTime)
1513 V_updateDate(vp) = vp->updateTime = now;
1515 /* The inode has been changed. Write it out to disk */
1517 #ifdef AFS_DEMAND_ATTACH_FS
1518 VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
1520 assert(V_needsSalvaged(vp));
1524 VnStore(ec, vp, vnp, vcp, class);
1527 vnp->changed_newTime = vnp->changed_oldTime = 0;
1531 #ifdef AFS_DEMAND_ATTACH_FS
1532 VnChangeState_r(vnp, VN_STATE_ONLINE);
1535 ConvertWriteToReadLock(&vnp->lock);
1541 * initial size of ihandle pointer vector.
1543 * @see VInvalidateVnodesByVolume_r
1545 #define IH_VEC_BASE_SIZE 256
1548 * increment amount for growing ihandle pointer vector.
1550 * @see VInvalidateVnodesByVolume_r
1552 #define IH_VEC_INCREMENT 256
1555 * Compile list of ihandles to be released/reallyclosed at a later time.
1557 * @param[in] vp volume object pointer
1558 * @param[out] vec_out vector of ihandle pointers to be released/reallyclosed
1559 * @param[out] vec_len_out number of valid elements in ihandle vector
1561 * @pre - VOL_LOCK is held
1562 * - volume is in appropriate exclusive state (e.g. VOL_STATE_VNODE_CLOSE,
1563 * VOL_STATE_VNODE_RELEASE)
1565 * @post - all vnodes on VVn list are invalidated
1566 * - ih_vec is populated with all valid ihandles
1568 * @return operation status
1570 * @retval ENOMEM out of memory
1572 * @todo we should handle out of memory conditions more gracefully.
1574 * @internal vnode package internal use only
1577 VInvalidateVnodesByVolume_r(Volume * vp,
1578 IHandle_t *** vec_out,
1579 size_t * vec_len_out)
1583 size_t i = 0, vec_len;
1584 IHandle_t **ih_vec, **ih_vec_new;
1586 #ifdef AFS_DEMAND_ATTACH_FS
1588 #endif /* AFS_DEMAND_ATTACH_FS */
1590 vec_len = IH_VEC_BASE_SIZE;
1591 ih_vec = malloc(sizeof(IHandle_t *) * vec_len);
1592 #ifdef AFS_DEMAND_ATTACH_FS
1599 * Traverse the volume's vnode list. Pull all the ihandles out into a
1600 * thread-private array for later asynchronous processing.
1602 #ifdef AFS_DEMAND_ATTACH_FS
1605 for (queue_Scan(&vp->vnode_list, vnp, nvnp, Vnode)) {
1606 if (vnp->handle != NULL) {
1608 #ifdef AFS_DEMAND_ATTACH_FS
1611 vec_len += IH_VEC_INCREMENT;
1612 ih_vec_new = realloc(ih_vec, sizeof(IHandle_t *) * vec_len);
1613 #ifdef AFS_DEMAND_ATTACH_FS
1616 if (ih_vec_new == NULL) {
1620 ih_vec = ih_vec_new;
1621 #ifdef AFS_DEMAND_ATTACH_FS
1623 * Theoretically, the volume's VVn list should not change
1624 * because the volume is in an exclusive state. For the
1625 * sake of safety, we will restart the traversal from the
1626 * the beginning (which is not expensive because we're
1627 * deleting the items from the list as we go).
1629 goto restart_traversal;
1632 ih_vec[i++] = vnp->handle;
1635 DeleteFromVVnList(vnp);
1636 VInvalidateVnode_r(vnp);
1646 /* VCloseVnodeFiles - called when a volume is going off line. All open
1647 * files for vnodes in that volume are closed. This might be excessive,
1648 * since we may only be taking one volume of a volume group offline.
1651 VCloseVnodeFiles_r(Volume * vp)
1653 #ifdef AFS_DEMAND_ATTACH_FS
1654 VolState vol_state_save;
1656 IHandle_t ** ih_vec;
1659 #ifdef AFS_DEMAND_ATTACH_FS
1660 vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_CLOSE);
1661 #endif /* AFS_DEMAND_ATTACH_FS */
1663 /* XXX need better error handling here */
1664 assert(VInvalidateVnodesByVolume_r(vp,
1670 * now we drop VOL_LOCK while we perform some potentially very
1671 * expensive operations in the background
1673 #ifdef AFS_DEMAND_ATTACH_FS
1677 for (i = 0; i < vec_len; i++) {
1678 IH_REALLYCLOSE(ih_vec[i]);
1683 #ifdef AFS_DEMAND_ATTACH_FS
1685 VChangeState_r(vp, vol_state_save);
1686 #endif /* AFS_DEMAND_ATTACH_FS */
1691 * shut down all vnode cache state for a given volume.
1693 * @param[in] vp volume object pointer
1695 * @pre VOL_LOCK is held
1697 * @post all file descriptors closed.
1698 * all inode handles released.
1699 * all vnode cache objects disassociated from volume.
1701 * @note for DAFS, these operations are performed outside the vol glock under
1702 * volume exclusive state VOL_STATE_VNODE_RELEASE. Please further note
1703 * that it would be a bug to acquire and release a volume reservation
1704 * during this exclusive operation. This is due to the fact that we are
1705 * generally called during the refcount 1->0 transition.
1707 * @todo we should handle failures in VInvalidateVnodesByVolume_r more
1710 * @see VInvalidateVnodesByVolume_r
1712 * @internal this routine is internal to the volume package
1715 VReleaseVnodeFiles_r(Volume * vp)
1717 #ifdef AFS_DEMAND_ATTACH_FS
1718 VolState vol_state_save;
1720 IHandle_t ** ih_vec;
1723 #ifdef AFS_DEMAND_ATTACH_FS
1724 vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_RELEASE);
1725 #endif /* AFS_DEMAND_ATTACH_FS */
1727 /* XXX need better error handling here */
1728 assert(VInvalidateVnodesByVolume_r(vp,
1734 * now we drop VOL_LOCK while we perform some potentially very
1735 * expensive operations in the background
1737 #ifdef AFS_DEMAND_ATTACH_FS
1741 for (i = 0; i < vec_len; i++) {
1742 IH_RELEASE(ih_vec[i]);
1747 #ifdef AFS_DEMAND_ATTACH_FS
1749 VChangeState_r(vp, vol_state_save);
1750 #endif /* AFS_DEMAND_ATTACH_FS */