2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 /* 1/1/89: NB: this stuff is all going to be replaced. Don't take it too seriously */
17 Institution: The Information Technology Center, Carnegie-Mellon University
21 #include <afs/param.h>
23 #include <afs/afsint.h>
26 #include <sys/param.h>
27 #if !defined(AFS_SGI_ENV)
30 #else /* AFS_OSF_ENV */
31 #ifdef AFS_VFSINCL_ENV
34 #include <sys/fs/ufs_fs.h>
38 #else /* AFS_VFSINCL_ENV */
39 #if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV)
42 #endif /* AFS_VFSINCL_ENV */
43 #endif /* AFS_OSF_ENV */
44 #endif /* AFS_SGI_ENV */
45 #endif /* AFS_NT40_ENV */
63 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
65 #include <sys/mnttab.h>
66 #include <sys/mntent.h>
72 #if defined(AFS_SGI_ENV)
75 #ifndef AFS_SGI_XFS_IOPS_ENV
76 #define ROOTINO EFS_ROOTINO
79 #include <sys/fs/efs.h>
81 #include "../sgiefs/efs.h" /* until 5.1 release */
84 #ifndef AFS_LINUX20_ENV
85 #include <fstab.h> /* Need to find in libc 5, present in libc 6 */
88 #endif /* AFS_SGI_ENV */
90 #endif /* AFS_HPUX_ENV */
94 #include <netinet/in.h>
99 #endif /* ITIMER_REAL */
100 #endif /* AFS_NT40_ENV */
101 #if defined(AFS_SUN5_ENV) || defined(AFS_NT40_ENV) || defined(AFS_LINUX20_ENV)
108 #include <afs/errors.h>
111 #include <afs/afssyscalls.h>
114 #include <afs/afsutil.h>
119 #include "partition.h"
120 #ifdef AFS_PTHREAD_ENV
122 #else /* AFS_PTHREAD_ENV */
123 #include "afs/assert.h"
124 #endif /* AFS_PTHREAD_ENV */
127 #if !defined(AFS_NT40_ENV) && !defined(AFS_NAMEI_ENV)
128 #include <afs/osi_inode.h>
134 #ifdef AFS_PTHREAD_ENV
135 pthread_mutex_t vol_glock_mutex;
136 pthread_mutex_t vol_attach_mutex;
137 pthread_cond_t vol_put_volume_cond;
138 pthread_cond_t vol_sleep_cond;
139 #endif /* AFS_PTHREAD_ENV */
142 extern void *calloc(), *realloc();
145 /* Forward declarations */
146 static Volume *attach2();
147 static void FreeVolume();
148 static void VScanUpdateList();
149 static void InitLRU();
150 static int GetVolumeHeader();
151 static void ReleaseVolumeHeader();
152 static void FreeVolumeHeader();
153 static void AddVolumeToHashTable();
154 static void DeleteVolumeFromHashTable();
155 static int VHold(Volume *vp);
156 static int VHold_r(Volume *vp);
157 static void GetBitmap(Error *ec, Volume *vp, VnodeClass class);
158 static void GetVolumePath(Error *ec, VolId volumeId, char **partitionp,
160 static void VReleaseVolumeHandles_r(Volume *vp);
161 static void VCloseVolumeHandles_r(Volume *vp);
163 int LogLevel; /* Vice loglevel--not defined as extern so that it will be
164 defined when not linked with vice, XXXX */
165 ProgramType programType; /* The type of program using the package */
168 #define VOLUME_BITMAP_GROWSIZE 16 /* bytes, => 128vnodes */
169 /* Must be a multiple of 4 (1 word) !!*/
170 #define VOLUME_HASH_TABLE_SIZE 128 /* Must be a power of 2!! */
171 #define VOLUME_HASH(volumeId) (volumeId&(VOLUME_HASH_TABLE_SIZE-1))
172 private Volume *VolumeHashTable[VOLUME_HASH_TABLE_SIZE];
175 /* This macro is used where an ffs() call does not exist. Was in util/ffs.c */
178 afs_int32 ffs_tmp = x; \
179 if (ffs_tmp == 0) return(-1); \
181 for (ffs_i = 1;; ffs_i++) { \
182 if (ffs_tmp & 1) return(ffs_i); \
183 else ffs_tmp >>= 1; \
186 #endif /* !AFS_HAVE_FFS */
188 struct Lock vol_listLock; /* Lock obtained when listing volumes: prevents a volume from being missed if the volume is attached during a list volumes */
190 extern struct Lock FSYNC_handler_lock;
192 Volume *VAttachVolumeByName();
193 Volume *VAttachVolumeByName_r();
195 static int TimeZoneCorrection; /* Number of seconds west of GMT */
197 /* Common message used when the volume goes off line */
198 char *VSalvageMessage =
199 "Files in this volume are currently unavailable; call operations";
201 int VInit; /* 0 - uninitialized,
202 1 - initialized but not all volumes have been attached,
203 2 - initialized and all volumes have been attached,
204 3 - initialized, all volumes have been attached, and
205 VConnectFS() has completed. */
208 int VolumeCacheCheck; /* Incremented everytime a volume goes on line--
209 * used to stamp volume headers and in-core
210 * vnodes. When the volume goes on-line the
211 * vnode will be invalidated */
213 int VolumeCacheSize = 200, VolumeGets=0, VolumeReplacements=0, Vlooks = 0;
216 int VInitVolumePackage(ProgramType pt, int nLargeVnodes, int nSmallVnodes,
217 int connect, int volcache)
219 int errors = 0; /* Number of errors while finding vice partitions. */
225 #ifdef AFS_PTHREAD_ENV
226 assert(pthread_mutex_init(&vol_glock_mutex, NULL) == 0);
227 assert(pthread_mutex_init(&vol_attach_mutex, NULL) == 0);
228 assert(pthread_cond_init(&vol_put_volume_cond, NULL) == 0);
229 assert(pthread_cond_init(&vol_sleep_cond, NULL) == 0);
230 #else /* AFS_PTHREAD_ENV */
232 #endif /* AFS_PTHREAD_ENV */
233 Lock_Init(&vol_listLock);
234 Lock_Init(&FSYNC_handler_lock);
235 srandom(time(0)); /* For VGetVolumeInfo */
236 gettimeofday(&tv, &tz);
237 TimeZoneCorrection = tz.tz_minuteswest*60;
239 /* Ok, we have done enough initialization that fileserver can
240 * start accepting calls, even though the volumes may not be
241 * available just yet.
245 if (programType == fileServer) {
246 /* File server or "stand" */
250 if (volcache > VolumeCacheSize)
251 VolumeCacheSize = volcache;
252 InitLRU(VolumeCacheSize);
254 VInitVnodes(vLarge, nLargeVnodes);
255 VInitVnodes(vSmall, nSmallVnodes);
258 errors = VAttachPartitions();
262 if (programType == fileServer) {
265 struct DiskPartition *diskP;
268 /* Attach all the volumes in this partition */
269 for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
270 int nAttached = 0, nUnattached = 0;
271 dirp = opendir(VPartitionPath(diskP));
273 while (dp = readdir(dirp)) {
275 p = strrchr(dp->d_name, '.');
276 if (p != NULL && strcmp(p, VHDREXT) == 0) {
279 vp = VAttachVolumeByName(&error, diskP->name, dp->d_name,
281 (*(vp?&nAttached:&nUnattached))++;
282 if (error == VOFFLINE)
283 Log("Volume %u stays offline (/vice/offline/%s exists)\n",
284 VolumeNumber(dp->d_name), dp->d_name);
290 Log("Partition %s: attached %d volumes; %d volumes not attached\n",
291 diskP->name, nAttached, nUnattached);
296 VInit = 2; /* Initialized, and all volumes have been attached */
297 if (programType == volumeUtility && connect) {
299 Log("Unable to connect to file server; aborted\n");
306 /* This must be called by any volume utility which needs to run while the
307 file server is also running. This is separated from VInitVolumePackage so
308 that a utility can fork--and each of the children can independently
309 initialize communication with the file server */
314 retVal = VConnectFS_r();
319 int VConnectFS_r(void)
322 assert(VInit == 2 && programType == volumeUtility);
323 rc = FSYNC_clientInit();
329 void VDisconnectFS_r(void) {
330 assert(programType == volumeUtility);
335 void VDisconnectFS(void) {
341 void VShutdown_r(void)
344 register Volume *vp, *np;
345 register afs_int32 code;
347 Log("VShutdown: shutting down on-line volumes...\n");
348 for (i=0; i<VOLUME_HASH_TABLE_SIZE; i++) {
349 /* try to hold first volume in the hash table */
350 for(vp = VolumeHashTable[i]; vp; vp=vp->hashNext) {
352 if (code == 0) break; /* got it */
353 /* otherwise we go around again, trying another volume */
356 /* first compute np before releasing vp, in case vp disappears
357 * after releasing. Hold it, so it doesn't disapear. If we
358 * can't hold it, try the next one in the chain. Invariant
359 * at the top of this loop is that vp is held (has extra ref count).
361 for(np=vp->hashNext; np; np=np->hashNext) {
363 if (code == 0) break; /* got it */
365 /* next, take the volume offline (drops reference count) */
366 VOffline_r(vp, "File server was shut down");
367 vp = np; /* next guy to try */
370 Log("VShutdown: complete.\n");
381 static void ReadHeader(Error *ec, IHandle_t *h, char *to, int size,
382 int magic, int version)
384 struct versionStamp *vsn;
394 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
396 FDH_REALLYCLOSE(fdP);
399 vsn = (struct versionStamp *) to;
400 if (FDH_READ(fdP, to, size) != size || vsn->magic != magic) {
402 FDH_REALLYCLOSE(fdP);
407 /* Check is conditional, in case caller wants to inspect version himself */
408 if (version && vsn->version != version) {
413 /* VolumeHeaderToDisk
414 * Allows for storing 64 bit inode numbers in on-disk volume header
417 void VolumeHeaderToDisk(VolumeDiskHeader_t *dh, VolumeHeader_t *h)
420 bzero((char*)dh, sizeof(VolumeDiskHeader_t));
421 dh->stamp = h->stamp;
423 dh->parent = h->parent;
425 #ifdef AFS_64BIT_IOPS_ENV
426 dh->volumeInfo_lo = (afs_int32) h->volumeInfo & 0xffffffff;
427 dh->volumeInfo_hi = (afs_int32) (h->volumeInfo >> 32) & 0xffffffff;
428 dh->smallVnodeIndex_lo = (afs_int32) h->smallVnodeIndex & 0xffffffff;
429 dh->smallVnodeIndex_hi = (afs_int32) (h->smallVnodeIndex >> 32) & 0xffffffff;
430 dh->largeVnodeIndex_lo = (afs_int32) h->largeVnodeIndex & 0xffffffff;
431 dh->largeVnodeIndex_hi = (afs_int32) (h->largeVnodeIndex >> 32) & 0xffffffff;
432 dh->linkTable_lo = (afs_int32) h->linkTable & 0xffffffff;
433 dh->linkTable_hi = (afs_int32) (h->linkTable >> 32) & 0xffffffff;
435 dh->volumeInfo_lo = h->volumeInfo;
436 dh->smallVnodeIndex_lo = h->smallVnodeIndex;
437 dh->largeVnodeIndex_lo = h->largeVnodeIndex;
438 dh->linkTable_lo = h->linkTable;
442 /* DiskToVolumeHeader
443 * Reads volume header file from disk, convering 64 bit inodes
444 * if required. Makes the assumption that AFS has *always*
445 * zero'd the volume header file so that high parts of inode
446 * numbers are 0 in older (SGI EFS) volume header files.
448 void DiskToVolumeHeader(VolumeHeader_t *h, VolumeDiskHeader_t *dh)
450 bzero((char*)h, sizeof(VolumeHeader_t));
451 h->stamp = dh->stamp;
453 h->parent = dh->parent;
455 #ifdef AFS_64BIT_IOPS_ENV
456 h->volumeInfo = dh->volumeInfo_lo | ((Inode)dh->volumeInfo_hi << 32);
458 h->smallVnodeIndex = dh->smallVnodeIndex_lo |
459 ((Inode)dh->smallVnodeIndex_hi << 32);
461 h->largeVnodeIndex = dh->largeVnodeIndex_lo |
462 ((Inode)dh->largeVnodeIndex_hi << 32);
463 h->linkTable = dh->linkTable_lo |
464 ((Inode)dh->linkTable_hi << 32);
466 h->volumeInfo = dh->volumeInfo_lo;
467 h->smallVnodeIndex = dh->smallVnodeIndex_lo;
468 h->largeVnodeIndex = dh->largeVnodeIndex_lo;
469 h->linkTable = dh->linkTable_lo;
474 void WriteVolumeHeader_r(ec, vp)
478 IHandle_t *h = V_diskDataHandle(vp);
488 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
490 FDH_REALLYCLOSE(fdP);
493 if (FDH_WRITE(fdP, (char*)&V_disk(vp), sizeof(V_disk(vp)))
494 != sizeof(V_disk(vp))) {
496 FDH_REALLYCLOSE(fdP);
502 /* Attach an existing volume, given its pathname, and return a
503 pointer to the volume header information. The volume also
504 normally goes online at this time. An offline volume
505 must be reattached to make it go online */
507 VAttachVolumeByName(ec, partition, name, mode)
516 retVal = VAttachVolumeByName_r(ec, partition, name, mode);
523 VAttachVolumeByName_r(ec, partition, name, mode)
532 struct VolumeDiskHeader diskHeader;
533 struct VolumeHeader iheader;
534 struct DiskPartition *partp;
538 if (programType == volumeUtility) {
540 VLockPartition_r(partition);
542 if (programType == fileServer) {
543 vp = VGetVolume_r(ec, VolumeNumber(name));
547 if (vp->specialStatus == VBUSY)
549 VDetachVolume_r(ec, vp);
553 if (!(partp = VGetPartition_r(partition, 0))) {
559 strcpy(path, VPartitionPath(partp));
563 if ((fd = open(path, O_RDONLY)) == -1 || fstat(fd,&status) == -1) {
569 n = read(fd, &diskHeader, sizeof (diskHeader));
572 if (n != sizeof (diskHeader) || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
573 Log("VAttachVolume: Error reading volume header %s\n", path);
577 if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
578 Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n",path);
583 DiskToVolumeHeader(&iheader, &diskHeader);
584 if (programType == volumeUtility && mode != V_SECRETLY) {
585 if (FSYNC_askfs(iheader.id, partition, FSYNC_NEEDVOLUME, mode)
587 Log("VAttachVolume: attach of volume %u apparently denied by file server\n",
589 *ec = VNOVOL; /* XXXX */
594 vp = attach2(ec, path, &iheader, partp, isbusy);
595 if (programType == volumeUtility && vp) {
596 /* duplicate computation in fssync.c about whether the server
597 * takes the volume offline or not. If the volume isn't
598 * offline, we must not return it when we detach the volume,
599 * or the server will abort */
600 if (mode == V_READONLY || (!VolumeWriteable(vp) && (mode==V_CLONE || mode==V_DUMP)))
601 vp->needsPutBack = 0;
603 vp->needsPutBack = 1;
605 /* OK, there's a problem here, but one that I don't know how to
606 * fix right now, and that I don't think should arise often.
607 * Basically, we should only put back this volume to the server if
608 * it was given to us by the server, but since we don't have a vp,
609 * we can't run the VolumeWriteable function to find out as we do
610 * above when computing vp->needsPutBack. So we send it back, but
611 * there's a path in VAttachVolume on the server which may abort
612 * if this volume doesn't have a header. Should be pretty rare
613 * for all of that to happen, but if it does, probably the right
614 * fix is for the server to allow the return of readonly volumes
615 * that it doesn't think are really checked out. */
616 if (programType == volumeUtility && vp == NULL && mode != V_SECRETLY) {
617 FSYNC_askfs(iheader.id, partition, FSYNC_ON, 0);
619 else if (programType == fileServer && vp) {
620 V_needsCallback(vp) = 0;
622 if (VInit >= 2 && V_BreakVolumeCallbacks) {
623 Log("VAttachVolume: Volume %u was changed externally; breaking callbacks\n", V_id(vp));
624 (*V_BreakVolumeCallbacks)(V_id(vp));
627 VUpdateVolume_r(ec,vp);
633 if (VolumeWriteable(vp) && V_dontSalvage(vp) == 0) {
634 /* This is a hack: by temporarily settint the incore
635 * dontSalvage flag ON, the volume will be put back on the
636 * Update list (with dontSalvage OFF again). It will then
637 * come back in N minutes with DONT_SALVAGE eventually
638 * set. This is the way that volumes that have never had
639 * it set get it set; or that volumes that have been
640 * offline without DONT SALVAGE having been set also
641 * eventually get it set */
642 V_dontSalvage(vp) = DONT_SALVAGE;
643 VAddToVolumeUpdateList_r(ec,vp);
651 Log("VOnline: volume %u (%s) attached and online\n",
652 V_id(vp), V_name(vp));
655 if (programType == volumeUtility) {
656 VUnlockPartition_r(partition);
664 private Volume *attach2(ec, path, header, partp, isbusy)
667 register struct VolumeHeader *header;
668 struct DiskPartition *partp;
674 vp = (Volume *) calloc(1, sizeof(Volume));
676 vp->specialStatus = (isbusy ? VBUSY : 0);
677 vp->device = partp->device;
678 vp->partition = partp;
679 IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header->parent,
680 header->largeVnodeIndex);
681 IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header->parent,
682 header->smallVnodeIndex);
683 IH_INIT(vp->diskDataHandle, partp->device, header->parent,
685 IH_INIT(vp->linkHandle, partp->device, header->parent,
687 vp->cacheCheck = ++VolumeCacheCheck;
688 vp->shuttingDown = 0;
689 vp->goingOffline = 0;
694 (void) ReadHeader(ec, V_diskDataHandle(vp),
695 (char *)&V_disk(vp), sizeof(V_disk(vp)),
696 VOLUMEINFOMAGIC, VOLUMEINFOVERSION);
699 struct IndexFileHeader iHead;
701 #if TRANSARC_VOL_STATS
703 * We just read in the diskstuff part of the header. If the detailed
704 * volume stats area has not yet been initialized, we should bzero the
705 * area and mark it as initialized.
707 if (! (V_stat_initialized(vp))) {
708 bzero((char *)(V_stat_area(vp)), VOL_STATS_BYTES);
709 V_stat_initialized(vp) = 1;
711 #endif /* TRANSARC_VOL_STATS */
713 (void) ReadHeader(ec, vp->vnodeIndex[vSmall].handle,
714 (char *)&iHead, sizeof(iHead),
715 SMALLINDEXMAGIC, SMALLINDEXVERSION);
719 struct IndexFileHeader iHead;
721 (void) ReadHeader(ec, vp->vnodeIndex[vLarge].handle,
722 (char *)&iHead, sizeof(iHead),
723 LARGEINDEXMAGIC, LARGEINDEXVERSION);
728 struct versionStamp stamp;
730 (void) ReadHeader(ec, V_linkHandle(vp),
731 (char *)&stamp, sizeof(stamp),
732 LINKTABLEMAGIC, LINKTABLEVERSION);
737 Log("VAttachVolume: Error attaching volume %s; volume needs salvage\n",
742 if (V_needsSalvaged(vp)) {
743 if (vp->specialStatus) vp->specialStatus = 0;
744 Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
748 if (programType == fileServer) {
749 if (V_inUse(vp) && VolumeWriteable(vp)) {
750 if (!V_needsSalvaged(vp)) {
751 V_needsSalvaged(vp) = 1;
752 VUpdateVolume_r(ec,vp);
755 Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
759 if (V_destroyMe(vp) == DESTROY_ME) {
761 Log("VAttachVolume: volume %s is junk; it should be destroyed at next salvage\n", path);
767 AddVolumeToHashTable(vp, V_id(vp));
768 vp->nextVnodeUnique = V_uniquifier(vp);
769 vp->vnodeIndex[vSmall].bitmap = vp->vnodeIndex[vLarge].bitmap = NULL;
770 if (programType == fileServer && VolumeWriteable(vp)) {
772 for (i = 0; i<nVNODECLASSES; i++) {
783 if (programType == fileServer) {
784 if (vp->specialStatus) vp->specialStatus = 0;
785 if (V_blessed(vp) && V_inService(vp) && !V_needsSalvaged(vp)) {
787 V_offlineMessage(vp)[0] = '\0';
794 /* Attach an existing volume.
795 The volume also normally goes online at this time.
796 An offline volume must be reattached to make it go online.
800 VAttachVolume(ec,volumeId, mode)
808 retVal = VAttachVolume_r(ec, volumeId, mode);
815 VAttachVolume_r(ec,volumeId, mode)
821 GetVolumePath(ec,volumeId, &part, &name);
825 vp = VGetVolume_r(&error, volumeId);
827 assert(V_inUse(vp) == 0);
828 VDetachVolume_r(ec, vp);
832 return VAttachVolumeByName_r(ec, part, name, mode);
835 /* Increment a reference count to a volume, sans context swaps. Requires
836 * possibly reading the volume header in from the disk, since there's
837 * an invariant in the volume package that nUsers>0 ==> vp->header is valid.
839 * N.B. This call can fail if we can't read in the header!! In this case
840 * we still guarantee we won't context swap, but the ref count won't be
841 * incremented (otherwise we'd violate the invariant).
843 static int VHold_r(register Volume *vp)
847 if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
848 VolumeReplacements++;
849 ReadHeader(&error, V_diskDataHandle(vp),
850 (char *)&V_disk(vp), sizeof(V_disk(vp)),
851 VOLUMEINFOMAGIC, VOLUMEINFOVERSION);
852 if (error) return error;
858 static int VHold(register Volume *vp)
862 retVal = VHold_r(vp);
867 void VTakeOffline_r(register Volume *vp)
869 assert(vp->nUsers > 0);
870 assert(programType == fileServer);
871 vp->goingOffline = 1;
872 V_needsSalvaged(vp) = 1;
875 void VTakeOffline(register Volume *vp)
882 void VPutVolume_r(register Volume *vp)
884 assert(--vp->nUsers >= 0);
885 if (vp->nUsers == 0) {
886 ReleaseVolumeHeader(vp->header);
887 if (vp->goingOffline) {
889 assert(programType == fileServer);
890 vp->goingOffline = 0;
892 VUpdateVolume_r(&error, vp);
893 VCloseVolumeHandles_r(vp);
895 Log("VOffline: Volume %u (%s) is now offline",
896 V_id(vp), V_name(vp));
897 if (V_offlineMessage(vp)[0])
898 Log(" (%s)", V_offlineMessage(vp));
901 #ifdef AFS_PTHREAD_ENV
902 assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
903 #else /* AFS_PTHREAD_ENV */
904 LWP_NoYieldSignal(VPutVolume);
905 #endif /* AFS_PTHREAD_ENV */
907 if (vp->shuttingDown) {
908 VReleaseVolumeHandles_r(vp);
910 if (programType == fileServer)
911 #ifdef AFS_PTHREAD_ENV
912 assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
913 #else /* AFS_PTHREAD_ENV */
914 LWP_NoYieldSignal(VPutVolume);
915 #endif /* AFS_PTHREAD_ENV */
920 void VPutVolume(register Volume *vp)
927 /* Get a pointer to an attached volume. The pointer is returned regardless
928 of whether or not the volume is in service or on/off line. An error
929 code, however, is returned with an indication of the volume's status */
930 Volume *VGetVolume(ec,volumeId)
936 retVal = VGetVolume_r(ec,volumeId);
941 Volume *VGetVolume_r(ec,volumeId)
946 unsigned short V0=0, V1=0, V2=0, V3=0, V4=0, V5=0, V6=0, V7=0, V8=0, V9=0;
947 unsigned short V10=0, V11=0, V12=0, V13=0, V14=0, V15=0;
952 for (vp = VolumeHashTable[VOLUME_HASH(volumeId)];
953 vp && vp->hashid != volumeId; vp = vp->hashNext)
960 /* Until we have reached an initialization level of 2
961 we don't know whether this volume exists or not.
962 We can't sleep and retry later because before a volume
963 is attached, the caller tries to get it first. Just
964 return VOFFLINE and the caller can choose whether to
965 retry the command or not.*/
976 if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
978 VolumeReplacements++;
979 ReadHeader(ec, V_diskDataHandle(vp),
980 (char *)&V_disk(vp), sizeof(V_disk(vp)), VOLUMEINFOMAGIC,
984 /* Only log the error if it was a totally unexpected error. Simply
985 a missing inode is likely to be caused by the volume being deleted */
986 if (errno != ENXIO || LogLevel)
987 Log("Volume %u: couldn't reread volume header\n", vp->hashid);
994 if (vp->shuttingDown) {
1000 if (programType == fileServer) {
1002 if (vp->goingOffline) {
1004 #ifdef AFS_PTHREAD_ENV
1005 pthread_cond_wait(&vol_put_volume_cond, &vol_glock_mutex);
1006 #else /* AFS_PTHREAD_ENV */
1007 LWP_WaitProcess(VPutVolume);
1008 #endif /* AFS_PTHREAD_ENV */
1011 if (vp->specialStatus) {
1013 *ec = vp->specialStatus;
1015 else if (V_inService(vp)==0 || V_blessed(vp)==0) {
1019 else if (V_inUse(vp)==0) {
1030 /* if no error, bump nUsers */
1031 if (vp) vp->nUsers++;
1038 /* For both VForceOffline and VOffline, we close all relevant handles.
1039 * For VOffline, if we re-attach the volume, the files may possible be
1040 * different than before.
1042 static void VReleaseVolumeHandles_r(Volume *vp)
1044 DFlushVolume(V_id(vp));
1045 VReleaseVnodeFiles_r(vp);
1047 /* Too time consuming and unnecessary for the volserver */
1048 if (programType != volumeUtility) {
1049 IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
1050 IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
1051 IH_CONDSYNC(vp->diskDataHandle);
1053 IH_CONDSYNC(vp->linkHandle);
1054 #endif /* AFS_NT40_ENV */
1057 IH_RELEASE(vp->vnodeIndex[vLarge].handle);
1058 IH_RELEASE(vp->vnodeIndex[vSmall].handle);
1059 IH_RELEASE(vp->diskDataHandle);
1060 IH_RELEASE(vp->linkHandle);
1063 /* Force the volume offline, set the salvage flag. No further references to
1064 * the volume through the volume package will be honored. */
1065 void VForceOffline_r(Volume *vp)
1070 strcpy(V_offlineMessage(vp), "Forced offline due to internal error: volume needs to be salvaged");
1071 Log("Volume %u forced offline: it needs salvaging!\n", V_id(vp));
1073 vp->goingOffline = 0;
1074 V_needsSalvaged(vp) = 1;
1075 VUpdateVolume_r(&error, vp);
1076 #ifdef AFS_PTHREAD_ENV
1077 assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
1078 #else /* AFS_PTHREAD_ENV */
1079 LWP_NoYieldSignal(VPutVolume);
1080 #endif /* AFS_PTHREAD_ENV */
1082 VReleaseVolumeHandles_r(vp);
1086 void VForceOffline(Volume *vp)
1089 VForceOffline_r(vp);
1093 /* The opposite of VAttachVolume. The volume header is written to disk, with
1094 the inUse bit turned off. A copy of the header is maintained in memory,
1095 however (which is why this is VOffline, not VDetach).
1097 void VOffline_r(Volume *vp, char *message)
1100 VolumeId vid = V_id(vp);
1101 assert(programType != volumeUtility);
1106 if (V_offlineMessage(vp)[0] == '\0')
1107 strncpy(V_offlineMessage(vp),message,
1108 sizeof(V_offlineMessage(vp)));
1109 V_offlineMessage(vp)[sizeof(V_offlineMessage(vp))-1] = '\0';
1110 vp->goingOffline = 1;
1112 vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */
1113 if (vp) /* In case it was reattached... */
1117 void VOffline(Volume *vp, char *message)
1120 VOffline_r(vp, message);
1124 /* For VDetachVolume, we close all cached file descriptors, but keep
1125 * the Inode handles in case we need to read from a busy volume.
1127 static void VCloseVolumeHandles_r(Volume *vp)
1129 DFlushVolume(V_id(vp));
1130 VCloseVnodeFiles_r(vp);
1132 /* Too time consuming and unnecessary for the volserver */
1133 if (programType != volumeUtility) {
1134 IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
1135 IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
1136 IH_CONDSYNC(vp->diskDataHandle);
1138 IH_CONDSYNC(vp->linkHandle);
1139 #endif /* AFS_NT40_ENV */
1142 IH_REALLYCLOSE(vp->vnodeIndex[vLarge].handle);
1143 IH_REALLYCLOSE(vp->vnodeIndex[vSmall].handle);
1144 IH_REALLYCLOSE(vp->diskDataHandle);
1145 IH_REALLYCLOSE(vp->linkHandle);
1148 /* This gets used for the most part by utility routines that don't want
1149 * to keep all the volume headers around. Generally, the file server won't
1150 * call this routine, because then the offline message in the volume header
1151 * (or other information) will still be available to clients. For NAMEI, also
1152 * close the file handles.
1154 void VDetachVolume_r(Error *ec, Volume *vp)
1157 struct DiskPartition *tpartp;
1158 int notifyServer, useDone;
1160 *ec = 0; /* always "succeeds" */
1161 if (programType == volumeUtility) {
1162 notifyServer = vp->needsPutBack;
1163 useDone = (V_destroyMe(vp) == DESTROY_ME);
1165 tpartp = vp->partition;
1167 DeleteVolumeFromHashTable(vp);
1168 vp->shuttingDown = 1;
1170 /* Will be detached sometime in the future--this is OK since volume is offline */
1172 if (programType == volumeUtility && notifyServer) {
1173 /* Note: The server is not notified in the case of a bogus volume explicitly to
1174 make it possible to create a volume, do a partial restore, then abort the
1175 operation without ever putting the volume online. This is essential in the
1176 case of a volume move operation between two partitions on the same server. In
1177 that case, there would be two instances of the same volume, one of them bogus,
1178 which the file server would attempt to put on line */
1180 FSYNC_askfs(volume, tpartp->name, FSYNC_DONE, 0); /* don't put online */
1182 FSYNC_askfs(volume, tpartp->name, FSYNC_ON, 0); /* fs can use it again */
1183 /* Dettaching it so break all callbacks on it*/
1184 if (V_BreakVolumeCallbacks) {
1185 Log("volume %u detached; breaking all call backs\n", volume);
1186 (*V_BreakVolumeCallbacks)(volume);
1192 void VDetachVolume(Error *ec, Volume *vp)
1195 VDetachVolume_r(ec, vp);
1200 int VAllocBitmapEntry_r(ec,vp,index)
1203 register struct vnodeIndex *index;
1205 register byte *bp,*ep;
1207 /* This test is probably redundant */
1208 if (!VolumeWriteable(vp)) {
1212 bp = index->bitmap + index->bitmapOffset;
1213 ep = index->bitmap + index->bitmapSize;
1215 if ((*(bit32 *)bp) != 0xffffffff) {
1217 index->bitmapOffset = bp - index->bitmap;
1220 o = ffs(~*bp)-1; /* ffs is documented in BSTRING(3) */
1222 return (bp - index->bitmap)*8 + o;
1224 bp += sizeof(bit32) /* i.e. 4 */;
1226 /* No bit map entry--must grow bitmap */
1228 realloc(index->bitmap, index->bitmapSize+VOLUME_BITMAP_GROWSIZE);
1231 bp += index->bitmapSize;
1232 bzero(bp, VOLUME_BITMAP_GROWSIZE);
1233 index->bitmapOffset = index->bitmapSize;
1234 index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
1236 return index->bitmapOffset*8;
1239 int VAllocBitmapEntry(ec,vp,index)
1242 register struct vnodeIndex *index;
1246 retVal = VAllocBitmapEntry_r(ec,vp,index);
1251 void VFreeBitMapEntry_r(Error *ec, register struct vnodeIndex *index,
1254 unsigned int offset;
1256 offset = bitNumber>>3;
1257 if (offset >= index->bitmapSize) {
1261 if (offset < index->bitmapOffset)
1262 index->bitmapOffset = offset&~3; /* Truncate to nearest bit32 */
1263 *(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
1266 void VFreeBitMapEntry(Error *ec, register struct vnodeIndex *index,
1270 VFreeBitMapEntry_r(ec, index, bitNumber);
1274 void VUpdateVolume_r(Error *ec,Volume *vp)
1277 if (programType == fileServer)
1278 V_uniquifier(vp) = (V_inUse(vp)? V_nextVnodeUnique(vp) + 200: V_nextVnodeUnique(vp));
1279 /*printf("Writing volume header for '%s'\n", V_name(vp));*/
1280 WriteVolumeHeader_r(ec, vp);
1283 "VUpdateVolume: error updating volume header, volume %u (%s)\n",
1284 V_id(vp), V_name(vp));
1285 VForceOffline_r(vp);
1289 void VUpdateVolume(Error *ec, Volume *vp)
1292 VUpdateVolume_r(ec, vp);
1296 void VSyncVolume_r(Error *ec, Volume *vp)
1299 VUpdateVolume_r(ec, vp);
1302 fdP = IH_OPEN(V_diskDataHandle(vp));
1303 assert(fdP != NULL);
1304 code = FDH_SYNC(fdP);
1310 void VSyncVolume(Error *ec, Volume *vp)
1313 VSyncVolume_r(ec, vp);
1317 static void FreeVolume(vp)
1323 for (i = 0; i<nVNODECLASSES; i++)
1324 if (vp->vnodeIndex[i].bitmap)
1325 free(vp->vnodeIndex[i].bitmap);
1326 FreeVolumeHeader(vp);
1327 DeleteVolumeFromHashTable(vp);
1331 static void GetBitmap(Error *ec, Volume *vp, VnodeClass class)
1333 StreamHandle_t *file;
1337 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
1338 struct vnodeIndex *vip = &vp->vnodeIndex[class];
1339 struct VnodeDiskObject *vnode;
1340 unsigned int unique = 0;
1345 fdP = IH_OPEN(vip->handle);
1346 assert (fdP != NULL);
1347 file = FDH_FDOPEN(fdP, "r");
1348 assert (file != NULL);
1349 vnode = (VnodeDiskObject *) malloc(vcp->diskSize);
1350 assert(vnode != NULL);
1351 size = OS_SIZE(fdP->fd_fd);
1353 nVnodes = (size <= vcp->diskSize? 0: size-vcp->diskSize)
1355 vip->bitmapSize = ((nVnodes/8)+10)/4*4; /* The 10 is a little extra so
1356 a few files can be created in this volume,
1357 the whole thing is rounded up to nearest 4
1358 bytes, because the bit map allocator likes
1360 vip->bitmap = (byte *) calloc(1, vip->bitmapSize);
1361 assert(vip->bitmap != NULL);
1362 vip->bitmapOffset = 0;
1363 if (STREAM_SEEK(file,vcp->diskSize,0) != -1) {
1365 for (bitNumber = 0; bitNumber < nVnodes+100; bitNumber++) {
1366 if (STREAM_READ(vnode, vcp->diskSize, 1, file) != 1)
1368 if (vnode->type != vNull) {
1369 if (vnode->vnodeMagic != vcp->magic) {
1370 Log("GetBitmap: addled vnode index in volume %s; volume needs salvage\n",
1375 *(vip->bitmap + (bitNumber>>3)) |= (1 << (bitNumber & 0x7));
1376 if (unique <= vnode->uniquifier)
1377 unique = vnode->uniquifier + 1;
1379 #ifndef AFS_PTHREAD_ENV
1380 if ((bitNumber & 0x00ff) == 0x0ff) { /* every 256 iterations */
1383 #endif /* !AFS_PTHREAD_ENV */
1386 if (vp->nextVnodeUnique < unique) {
1387 Log("GetBitmap: bad volume uniquifier for volume %s; volume needs salvage\n", V_name(vp));
1390 /* Paranoia, partly justified--I think fclose after fdopen
1391 * doesn't seem to close fd. In any event, the documentation
1392 * doesn't specify, so it's safer to close it twice.
1399 static void GetVolumePath(Error *ec, VolId volumeId, char **partitionp,
1402 static char partition[VMAXPATHLEN], name[VMAXPATHLEN];
1403 char path[VMAXPATHLEN];
1405 struct DiskPartition *dp;
1409 sprintf(&name[1],VFORMAT,volumeId);
1410 for (dp = DiskPartitionList; dp; dp = dp->next) {
1412 strcpy(path, VPartitionPath(dp));
1414 if (stat(path,&status) == 0) {
1415 strcpy(partition, dp->name);
1422 *partitionp = *namep = NULL;
1425 *partitionp = partition;
1435 return atoi(name+1);
1438 char *VolumeExternalName(volumeId)
1441 static char name[15];
1442 sprintf(name,VFORMAT,volumeId);
1446 #if TRANSARC_VOL_STATS
1447 #define OneDay (86400) /* 24 hours' worth of seconds */
1449 #define OneDay (24*60*60) /* 24 hours */
1450 #endif /* TRANSARC_VOL_STATS */
1452 #define Midnight(date) ((date-TimeZoneCorrection)/OneDay*OneDay+TimeZoneCorrection)
1454 /*------------------------------------------------------------------------
1455 * [export] VAdjustVolumeStatistics
1458 * If we've passed midnight, we need to update all the day use
1459 * statistics as well as zeroing the detailed volume statistics
1460 * (if we are implementing them).
1463 * vp : Pointer to the volume structure describing the lucky
1464 * volume being considered for update.
1470 * Nothing interesting.
1474 *------------------------------------------------------------------------*/
1476 VAdjustVolumeStatistics_r(vp)
1477 register Volume *vp;
1479 { /*VAdjustVolumeStatistics*/
1481 unsigned int now = FT_ApproxTime();
1483 if (now - V_dayUseDate(vp) > OneDay) {
1486 ndays = (now - V_dayUseDate(vp)) / OneDay;
1487 for (i = 6; i>ndays-1; i--)
1488 V_weekUse(vp)[i] = V_weekUse(vp)[i-ndays];
1489 for (i = 0; i<ndays-1 && i<7; i++)
1490 V_weekUse(vp)[i] = 0;
1492 V_weekUse(vp)[ndays-1] = V_dayUse(vp);
1494 V_dayUseDate(vp) = Midnight(now);
1496 #if TRANSARC_VOL_STATS
1498 * All we need to do is bzero the entire VOL_STATS_BYTES of
1499 * the detailed volume statistics area.
1501 bzero((char *)(V_stat_area(vp)), VOL_STATS_BYTES);
1502 #endif /* TRANSARC_VOL_STATS */
1503 } /*It's been more than a day of collection*/
1505 #if TRANSARC_VOL_STATS
1507 * Always return happily.
1510 #endif /* TRANSARC_VOL_STATS */
1512 } /*VAdjustVolumeStatistics*/
1514 VAdjustVolumeStatistics(vp)
1515 register Volume *vp;
1519 VAdjustVolumeStatistics_r(vp);
1524 void VBumpVolumeUsage_r(register Volume *vp)
1526 unsigned int now = FT_ApproxTime();
1527 if (now - V_dayUseDate(vp) > OneDay)
1528 VAdjustVolumeStatistics_r(vp);
1530 * Save the volume header image to disk after every 128 bumps to dayUse.
1532 if ((V_dayUse(vp)++ & 127) == 0) {
1534 VUpdateVolume_r(&error, vp);
1538 void VBumpVolumeUsage(register Volume *vp)
1541 VBumpVolumeUsage_r(vp);
1545 void VSetDiskUsage_r(void)
1547 static int FifteenMinuteCounter = 0;
1550 /* NOTE: Don't attempt to access the partitions list until the
1551 initialization level indicates that all volumes are attached,
1552 which implies that all partitions are initialized. */
1553 #ifdef AFS_PTHREAD_ENV
1555 #else /* AFS_PTHREAD_ENV */
1557 #endif /* AFS_PTHREAD_ENV */
1560 VResetDiskUsage_r();
1561 if (++FifteenMinuteCounter == 3) {
1562 FifteenMinuteCounter = 0;
1567 void VSetDiskUsage(void)
1574 /* The number of minutes that a volume hasn't been updated before the
1575 * "Dont salvage" flag in the volume header will be turned on */
1576 #define SALVAGE_INTERVAL (10*60)
1578 static VolumeId *UpdateList; /* Pointer to array of Volume ID's */
1579 static int nUpdatedVolumes; /* Updated with entry in UpdateList, salvage after crash flag on */
1580 static int updateSize; /* number of entries possible */
1581 #define UPDATE_LIST_SIZE 100 /* size increment */
1583 void VAddToVolumeUpdateList_r(Error *ec, Volume *vp)
1586 vp->updateTime = FT_ApproxTime();
1587 if (V_dontSalvage(vp) == 0)
1589 V_dontSalvage(vp) = 0;
1590 VSyncVolume_r(ec, vp);
1594 updateSize = UPDATE_LIST_SIZE;
1595 UpdateList = (VolumeId *) malloc(sizeof (VolumeId) * updateSize);
1597 if (nUpdatedVolumes == updateSize) {
1598 updateSize += UPDATE_LIST_SIZE;
1599 UpdateList = (VolumeId *) realloc(UpdateList, sizeof (VolumeId) * updateSize);
1602 UpdateList[nUpdatedVolumes++] = V_id(vp);
1605 static void VScanUpdateList() {
1606 register int i, gap;
1607 register Volume *vp;
1609 afs_int32 now = FT_ApproxTime();
1610 /* Be careful with this code, since it works with interleaved calls to AddToVolumeUpdateList */
1611 for (i = gap = 0; i<nUpdatedVolumes; i++) {
1612 vp = VGetVolume_r(&error, UpdateList[i-gap] = UpdateList[i]);
1615 } else if (vp->nUsers == 1 && now - vp->updateTime > SALVAGE_INTERVAL) {
1616 V_dontSalvage(vp) = DONT_SALVAGE;
1617 VUpdateVolume_r(&error, vp); /* No need to fsync--not critical */
1622 #ifndef AFS_PTHREAD_ENV
1624 #endif /* !AFS_PTHREAD_ENV */
1626 nUpdatedVolumes -= gap;
1629 /***************************************************/
1630 /* Add on routines to manage a volume header cache */
1631 /***************************************************/
1633 static struct volHeader *volumeLRU;
1635 /* Allocate a bunch of headers; string them together */
1636 static void InitLRU(howMany)
1639 register struct volHeader *hp;
1640 if (programType != fileServer)
1642 hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader)));
1644 ReleaseVolumeHeader(hp++);
1647 /* Get a volume header from the LRU list; update the old one if necessary */
1648 /* Returns 1 if there was already a header, which is removed from the LRU list */
1649 static int GetVolumeHeader(vp)
1650 register Volume *vp;
1653 register struct volHeader *hd;
1655 static int everLogged = 0;
1657 old = (vp->header != 0); /* old == volume already has a header */
1658 if (programType != fileServer) {
1660 hd = (struct volHeader *) calloc(1, sizeof(*vp->header));
1669 if (volumeLRU == hd)
1670 volumeLRU = hd->next;
1671 assert(hd->back == vp);
1675 hd = volumeLRU->prev; /* not currently in use and least recently used */
1677 hd = (struct volHeader *) calloc(1, sizeof(*vp->header));
1678 hd->prev = hd->next = hd; /* make it look like single elt LRU */
1680 Log("****Allocated more volume headers, probably leak****\n");
1685 if (hd->diskstuff.inUse) {
1686 WriteVolumeHeader_r(&error, hd->back);
1687 /* Ignore errors; catch them later */
1689 hd->back->header = 0;
1694 if (hd->next) { /* hd->next != 0 --> in LRU chain (we zero it later) */
1695 hd->prev->next = hd->next; /* pull hd out of LRU list */
1696 hd->next->prev = hd->prev; /* if hd only element, this is noop */
1698 hd->next = hd->prev = 0;
1699 /* if not in LRU chain, next test won't be true */
1700 if (hd == volumeLRU) /* last header item, turn into empty list */
1701 volumeLRU = (struct volHeader *) 0;
1706 /* Put it at the top of the LRU chain */
1707 static void ReleaseVolumeHeader(hd)
1708 register struct volHeader *hd;
1710 if (programType != fileServer)
1712 if (!hd || hd->next) /* no header, or header already released */
1715 hd->next = hd->prev = hd;
1717 hd->prev = volumeLRU->prev;
1718 hd->next = volumeLRU;
1719 hd->prev->next = hd->next->prev = hd;
1724 static void FreeVolumeHeader(vp)
1725 register Volume *vp;
1727 register struct volHeader *hd = vp->header;
1730 if (programType == fileServer) {
1731 ReleaseVolumeHeader(hd);
1741 /***************************************************/
1742 /* Routines to add volume to hash chain, delete it */
1743 /***************************************************/
1745 static void AddVolumeToHashTable(vp, hashid)
1746 register Volume *vp;
1748 int hash = VOLUME_HASH(hashid);
1749 vp->hashid = hashid;
1750 vp->hashNext = VolumeHashTable[hash];
1751 VolumeHashTable[hash] = vp;
1752 vp->vnodeHashOffset = VolumeHashOffset_r();
1755 static void DeleteVolumeFromHashTable(vp)
1756 register Volume *vp;
1758 int hash = VOLUME_HASH(vp->hashid);
1759 if (VolumeHashTable[hash] == vp)
1760 VolumeHashTable[hash] = vp->hashNext;
1762 Volume *tvp = VolumeHashTable[hash];
1765 while (tvp->hashNext && tvp->hashNext != vp)
1766 tvp = tvp->hashNext;
1767 if (tvp->hashNext == NULL)
1769 tvp->hashNext = vp->hashNext;
1774 void VPrintCacheStats_r(void)
1776 register struct VnodeClassInfo *vcp;
1777 vcp = &VnodeClassInfo[vLarge];
1778 Log("Large vnode cache, %d entries, %d allocs, %d gets (%d reads), %d writes\n",
1779 vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
1780 vcp = &VnodeClassInfo[vSmall];
1781 Log("Small vnode cache,%d entries, %d allocs, %d gets (%d reads), %d writes\n",
1782 vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
1783 Log("Volume header cache, %d entries, %d gets, %d replacements\n",
1784 VolumeCacheSize, VolumeGets, VolumeReplacements);
1787 void VPrintCacheStats(void)
1790 VPrintCacheStats_r();