1 /* 1/1/89: NB: this stuff is all going to be replaced. Don't take it too seriously */
7 * (C) COPYRIGHT IBM CORPORATION 1987
8 * LICENSED MATERIALS - PROPERTY OF IBM
14 Institution: The Information Technology Center, Carnegie-Mellon University
18 #include <afs/param.h>
20 #include <afs/afsint.h>
23 #include <sys/param.h>
24 #if !defined(AFS_SGI_ENV)
27 #else /* AFS_OSF_ENV */
28 #ifdef AFS_VFSINCL_ENV
31 #include <sys/fs/ufs_fs.h>
35 #else /* AFS_VFSINCL_ENV */
36 #if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV)
39 #endif /* AFS_VFSINCL_ENV */
40 #endif /* AFS_OSF_ENV */
41 #endif /* AFS_SGI_ENV */
42 #endif /* AFS_NT40_ENV */
60 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
62 #include <sys/mnttab.h>
63 #include <sys/mntent.h>
69 #if defined(AFS_SGI_ENV)
72 #ifndef AFS_SGI_XFS_IOPS_ENV
73 #define ROOTINO EFS_ROOTINO
76 #include <sys/fs/efs.h>
78 #include "../sgiefs/efs.h" /* until 5.1 release */
81 #ifndef AFS_LINUX20_ENV
82 #include <fstab.h> /* Need to find in libc 5, present in libc 6 */
85 #endif /* AFS_SGI_ENV */
87 #endif /* AFS_HPUX_ENV */
91 #include <netinet/in.h>
96 #endif /* ITIMER_REAL */
97 #endif /* AFS_NT40_ENV */
98 #if defined(AFS_SUN5_ENV) || defined(AFS_NT40_ENV)
105 #include <afs/errors.h>
108 #include <afs/afssyscalls.h>
111 #include <afs/afsutil.h>
116 #include "partition.h"
117 #ifdef AFS_PTHREAD_ENV
119 #else /* AFS_PTHREAD_ENV */
120 #include "afs/assert.h"
121 #endif /* AFS_PTHREAD_ENV */
124 #if !defined(AFS_NT40_ENV) && !defined(AFS_NAMEI_ENV)
125 #include <afs/osi_inode.h>
131 #ifdef AFS_PTHREAD_ENV
132 pthread_mutex_t vol_glock_mutex;
133 pthread_mutex_t vol_attach_mutex;
134 pthread_cond_t vol_put_volume_cond;
135 pthread_cond_t vol_sleep_cond;
136 #endif /* AFS_PTHREAD_ENV */
139 extern void *calloc(), *realloc();
142 /* Forward declarations */
143 static Volume *attach2();
144 static void FreeVolume();
145 static void VScanUpdateList();
146 static void InitLRU();
147 static int GetVolumeHeader();
148 static void ReleaseVolumeHeader();
149 static void FreeVolumeHeader();
150 static void AddVolumeToHashTable();
151 static void DeleteVolumeFromHashTable();
152 static int VHold(Volume *vp);
153 static int VHold_r(Volume *vp);
154 static void GetBitmap(Error *ec, Volume *vp, VnodeClass class);
155 static void GetVolumePath(Error *ec, VolId volumeId, char **partitionp,
157 static void VReleaseVolumeHandles_r(Volume *vp);
158 static void VCloseVolumeHandles_r(Volume *vp);
160 int LogLevel; /* Vice loglevel--not defined as extern so that it will be
161 defined when not linked with vice, XXXX */
162 ProgramType programType; /* The type of program using the package */
165 #define VOLUME_BITMAP_GROWSIZE 16 /* bytes, => 128vnodes */
166 /* Must be a multiple of 4 (1 word) !!*/
167 #define VOLUME_HASH_TABLE_SIZE 128 /* Must be a power of 2!! */
168 #define VOLUME_HASH(volumeId) (volumeId&(VOLUME_HASH_TABLE_SIZE-1))
169 private Volume *VolumeHashTable[VOLUME_HASH_TABLE_SIZE];
172 /* This macro is used where an ffs() call does not exist. Was in util/ffs.c */
175 afs_int32 ffs_tmp = x; \
176 if (ffs_tmp == 0) return(-1); \
178 for (ffs_i = 1;; ffs_i++) { \
179 if (ffs_tmp & 1) return(ffs_i); \
180 else ffs_tmp >>= 1; \
183 #endif /* !AFS_HAVE_FFS */
185 struct Lock vol_listLock; /* Lock obtained when listing volumes: prevents a volume from being missed if the volume is attached during a list volumes */
187 extern struct Lock FSYNC_handler_lock;
189 Volume *VAttachVolumeByName();
190 Volume *VAttachVolumeByName_r();
192 static int TimeZoneCorrection; /* Number of seconds west of GMT */
194 /* Common message used when the volume goes off line */
195 char *VSalvageMessage =
196 "Files in this volume are currently unavailable; call operations";
198 int VInit; /* 0 - uninitialized,
199 1 - initialized but not all volumes have been attached,
200 2 - initialized and all volumes have been attached,
201 3 - initialized, all volumes have been attached, and
202 VConnectFS() has completed. */
205 int VolumeCacheCheck; /* Incremented everytime a volume goes on line--
206 * used to stamp volume headers and in-core
207 * vnodes. When the volume goes on-line the
208 * vnode will be invalidated */
210 int VolumeCacheSize = 200, VolumeGets=0, VolumeReplacements=0, Vlooks = 0;
213 int VInitVolumePackage(ProgramType pt, int nLargeVnodes, int nSmallVnodes,
214 int connect, int volcache)
216 int errors = 0; /* Number of errors while finding vice partitions. */
222 #ifdef AFS_PTHREAD_ENV
223 assert(pthread_mutex_init(&vol_glock_mutex, NULL) == 0);
224 assert(pthread_mutex_init(&vol_attach_mutex, NULL) == 0);
225 assert(pthread_cond_init(&vol_put_volume_cond, NULL) == 0);
226 assert(pthread_cond_init(&vol_sleep_cond, NULL) == 0);
227 #else /* AFS_PTHREAD_ENV */
229 #endif /* AFS_PTHREAD_ENV */
230 Lock_Init(&vol_listLock);
231 Lock_Init(&FSYNC_handler_lock);
232 srandom(time(0)); /* For VGetVolumeInfo */
233 gettimeofday(&tv, &tz);
234 TimeZoneCorrection = tz.tz_minuteswest*60;
236 /* Ok, we have done enough initialization that fileserver can
237 * start accepting calls, even though the volumes may not be
238 * available just yet.
242 if (programType == fileServer) {
243 /* File server or "stand" */
247 if (volcache > VolumeCacheSize)
248 VolumeCacheSize = volcache;
249 InitLRU(VolumeCacheSize);
251 VInitVnodes(vLarge, nLargeVnodes);
252 VInitVnodes(vSmall, nSmallVnodes);
255 errors = VAttachPartitions();
259 if (programType == fileServer) {
262 struct DiskPartition *diskP;
265 /* Attach all the volumes in this partition */
266 for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
267 int nAttached = 0, nUnattached = 0;
268 dirp = opendir(VPartitionPath(diskP));
270 while (dp = readdir(dirp)) {
272 p = strrchr(dp->d_name, '.');
273 if (p != NULL && strcmp(p, VHDREXT) == 0) {
276 vp = VAttachVolumeByName(&error, diskP->name, dp->d_name,
278 (*(vp?&nAttached:&nUnattached))++;
279 if (error == VOFFLINE)
280 Log("Volume %u stays offline (/vice/offline/%s exists)\n",
281 VolumeNumber(dp->d_name), dp->d_name);
287 Log("Partition %s: attached %d volumes; %d volumes not attached\n",
288 diskP->name, nAttached, nUnattached);
293 VInit = 2; /* Initialized, and all volumes have been attached */
294 if (programType == volumeUtility && connect) {
296 Log("Unable to connect to file server; aborted\n");
303 /* This must be called by any volume utility which needs to run while the
304 file server is also running. This is separated from VInitVolumePackage so
305 that a utility can fork--and each of the children can independently
306 initialize communication with the file server */
311 retVal = VConnectFS_r();
316 int VConnectFS_r(void)
319 assert(VInit == 2 && programType == volumeUtility);
320 rc = FSYNC_clientInit();
326 void VDisconnectFS_r(void) {
327 assert(programType == volumeUtility);
332 void VDisconnectFS(void) {
338 void VShutdown_r(void)
341 register Volume *vp, *np;
342 register afs_int32 code;
344 Log("VShutdown: shutting down on-line volumes...\n");
345 for (i=0; i<VOLUME_HASH_TABLE_SIZE; i++) {
346 /* try to hold first volume in the hash table */
347 for(vp = VolumeHashTable[i]; vp; vp=vp->hashNext) {
349 if (code == 0) break; /* got it */
350 /* otherwise we go around again, trying another volume */
353 /* first compute np before releasing vp, in case vp disappears
354 * after releasing. Hold it, so it doesn't disapear. If we
355 * can't hold it, try the next one in the chain. Invariant
356 * at the top of this loop is that vp is held (has extra ref count).
358 for(np=vp->hashNext; np; np=np->hashNext) {
360 if (code == 0) break; /* got it */
362 /* next, take the volume offline (drops reference count) */
363 VOffline_r(vp, "File server was shut down");
364 vp = np; /* next guy to try */
367 Log("VShutdown: complete.\n");
378 static void ReadHeader(Error *ec, IHandle_t *h, char *to, int size,
379 int magic, int version)
381 struct versionStamp *vsn;
391 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
393 FDH_REALLYCLOSE(fdP);
396 vsn = (struct versionStamp *) to;
397 if (FDH_READ(fdP, to, size) != size || vsn->magic != magic) {
399 FDH_REALLYCLOSE(fdP);
404 /* Check is conditional, in case caller wants to inspect version himself */
405 if (version && vsn->version != version) {
410 /* VolumeHeaderToDisk
411 * Allows for storing 64 bit inode numbers in on-disk volume header
414 void VolumeHeaderToDisk(VolumeDiskHeader_t *dh, VolumeHeader_t *h)
417 bzero((char*)dh, sizeof(VolumeDiskHeader_t));
418 dh->stamp = h->stamp;
420 dh->parent = h->parent;
422 #ifdef AFS_64BIT_IOPS_ENV
423 dh->volumeInfo_lo = (afs_int32) h->volumeInfo & 0xffffffff;
424 dh->volumeInfo_hi = (afs_int32) (h->volumeInfo >> 32) & 0xffffffff;
425 dh->smallVnodeIndex_lo = (afs_int32) h->smallVnodeIndex & 0xffffffff;
426 dh->smallVnodeIndex_hi = (afs_int32) (h->smallVnodeIndex >> 32) & 0xffffffff;
427 dh->largeVnodeIndex_lo = (afs_int32) h->largeVnodeIndex & 0xffffffff;
428 dh->largeVnodeIndex_hi = (afs_int32) (h->largeVnodeIndex >> 32) & 0xffffffff;
429 dh->linkTable_lo = (afs_int32) h->linkTable & 0xffffffff;
430 dh->linkTable_hi = (afs_int32) (h->linkTable >> 32) & 0xffffffff;
432 dh->volumeInfo_lo = h->volumeInfo;
433 dh->smallVnodeIndex_lo = h->smallVnodeIndex;
434 dh->largeVnodeIndex_lo = h->largeVnodeIndex;
435 dh->linkTable_lo = h->linkTable;
439 /* DiskToVolumeHeader
440 * Reads volume header file from disk, convering 64 bit inodes
441 * if required. Makes the assumption that AFS has *always*
442 * zero'd the volume header file so that high parts of inode
443 * numbers are 0 in older (SGI EFS) volume header files.
445 void DiskToVolumeHeader(VolumeHeader_t *h, VolumeDiskHeader_t *dh)
447 bzero((char*)h, sizeof(VolumeHeader_t));
448 h->stamp = dh->stamp;
450 h->parent = dh->parent;
452 #ifdef AFS_64BIT_IOPS_ENV
453 h->volumeInfo = dh->volumeInfo_lo | ((Inode)dh->volumeInfo_hi << 32);
455 h->smallVnodeIndex = dh->smallVnodeIndex_lo |
456 ((Inode)dh->smallVnodeIndex_hi << 32);
458 h->largeVnodeIndex = dh->largeVnodeIndex_lo |
459 ((Inode)dh->largeVnodeIndex_hi << 32);
460 h->linkTable = dh->linkTable_lo |
461 ((Inode)dh->linkTable_hi << 32);
463 h->volumeInfo = dh->volumeInfo_lo;
464 h->smallVnodeIndex = dh->smallVnodeIndex_lo;
465 h->largeVnodeIndex = dh->largeVnodeIndex_lo;
466 h->linkTable = dh->linkTable_lo;
471 void WriteVolumeHeader_r(ec, vp)
475 IHandle_t *h = V_diskDataHandle(vp);
485 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
487 FDH_REALLYCLOSE(fdP);
490 if (FDH_WRITE(fdP, (char*)&V_disk(vp), sizeof(V_disk(vp)))
491 != sizeof(V_disk(vp))) {
493 FDH_REALLYCLOSE(fdP);
499 /* Attach an existing volume, given its pathname, and return a
500 pointer to the volume header information. The volume also
501 normally goes online at this time. An offline volume
502 must be reattached to make it go online */
504 VAttachVolumeByName(ec, partition, name, mode)
513 retVal = VAttachVolumeByName_r(ec, partition, name, mode);
520 VAttachVolumeByName_r(ec, partition, name, mode)
529 struct VolumeDiskHeader diskHeader;
530 struct VolumeHeader iheader;
531 struct DiskPartition *partp;
535 if (programType == volumeUtility) {
537 VLockPartition_r(partition);
539 if (programType == fileServer) {
540 vp = VGetVolume_r(ec, VolumeNumber(name));
544 if (vp->specialStatus == VBUSY)
546 VDetachVolume_r(ec, vp);
550 if (!(partp = VGetPartition_r(partition, 0))) {
556 strcpy(path, VPartitionPath(partp));
560 if ((fd = open(path, O_RDONLY)) == -1 || fstat(fd,&status) == -1) {
566 n = read(fd, &diskHeader, sizeof (diskHeader));
569 if (n != sizeof (diskHeader) || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
570 Log("VAttachVolume: Error reading volume header %s\n", path);
574 if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
575 Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n",path);
580 DiskToVolumeHeader(&iheader, &diskHeader);
581 if (programType == volumeUtility && mode != V_SECRETLY) {
582 if (FSYNC_askfs(iheader.id, partition, FSYNC_NEEDVOLUME, mode)
584 Log("VAttachVolume: attach of volume %u apparently denied by file server\n",
586 *ec = VNOVOL; /* XXXX */
591 vp = attach2(ec, path, &iheader, partp, isbusy);
592 if (programType == volumeUtility && vp) {
593 /* duplicate computation in fssync.c about whether the server
594 * takes the volume offline or not. If the volume isn't
595 * offline, we must not return it when we detach the volume,
596 * or the server will abort */
597 if (mode == V_READONLY || (!VolumeWriteable(vp) && (mode==V_CLONE || mode==V_DUMP)))
598 vp->needsPutBack = 0;
600 vp->needsPutBack = 1;
602 /* OK, there's a problem here, but one that I don't know how to
603 * fix right now, and that I don't think should arise often.
604 * Basically, we should only put back this volume to the server if
605 * it was given to us by the server, but since we don't have a vp,
606 * we can't run the VolumeWriteable function to find out as we do
607 * above when computing vp->needsPutBack. So we send it back, but
608 * there's a path in VAttachVolume on the server which may abort
609 * if this volume doesn't have a header. Should be pretty rare
610 * for all of that to happen, but if it does, probably the right
611 * fix is for the server to allow the return of readonly volumes
612 * that it doesn't think are really checked out. */
613 if (programType == volumeUtility && vp == NULL && mode != V_SECRETLY) {
614 FSYNC_askfs(iheader.id, partition, FSYNC_ON, 0);
616 else if (programType == fileServer && vp) {
617 V_needsCallback(vp) = 0;
619 if (VInit >= 2 && V_BreakVolumeCallbacks) {
620 Log("VAttachVolume: Volume %u was changed externally; breaking callbacks\n", V_id(vp));
621 (*V_BreakVolumeCallbacks)(V_id(vp));
624 VUpdateVolume_r(ec,vp);
630 if (VolumeWriteable(vp) && V_dontSalvage(vp) == 0) {
631 /* This is a hack: by temporarily settint the incore
632 * dontSalvage flag ON, the volume will be put back on the
633 * Update list (with dontSalvage OFF again). It will then
634 * come back in N minutes with DONT_SALVAGE eventually
635 * set. This is the way that volumes that have never had
636 * it set get it set; or that volumes that have been
637 * offline without DONT SALVAGE having been set also
638 * eventually get it set */
639 V_dontSalvage(vp) = DONT_SALVAGE;
640 VAddToVolumeUpdateList_r(ec,vp);
648 Log("VOnline: volume %u (%s) attached and online\n",
649 V_id(vp), V_name(vp));
652 if (programType == volumeUtility) {
653 VUnlockPartition_r(partition);
661 private Volume *attach2(ec, path, header, partp, isbusy)
664 register struct VolumeHeader *header;
665 struct DiskPartition *partp;
671 vp = (Volume *) calloc(1, sizeof(Volume));
673 vp->specialStatus = (isbusy ? VBUSY : 0);
674 vp->device = partp->device;
675 vp->partition = partp;
676 IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header->parent,
677 header->largeVnodeIndex);
678 IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header->parent,
679 header->smallVnodeIndex);
680 IH_INIT(vp->diskDataHandle, partp->device, header->parent,
682 IH_INIT(vp->linkHandle, partp->device, header->parent,
684 vp->cacheCheck = ++VolumeCacheCheck;
685 vp->shuttingDown = 0;
686 vp->goingOffline = 0;
691 (void) ReadHeader(ec, V_diskDataHandle(vp),
692 (char *)&V_disk(vp), sizeof(V_disk(vp)),
693 VOLUMEINFOMAGIC, VOLUMEINFOVERSION);
696 struct IndexFileHeader iHead;
698 #if TRANSARC_VOL_STATS
700 * We just read in the diskstuff part of the header. If the detailed
701 * volume stats area has not yet been initialized, we should bzero the
702 * area and mark it as initialized.
704 if (! (V_stat_initialized(vp))) {
705 bzero((char *)(V_stat_area(vp)), VOL_STATS_BYTES);
706 V_stat_initialized(vp) = 1;
708 #endif /* TRANSARC_VOL_STATS */
710 (void) ReadHeader(ec, vp->vnodeIndex[vSmall].handle,
711 (char *)&iHead, sizeof(iHead),
712 SMALLINDEXMAGIC, SMALLINDEXVERSION);
716 struct IndexFileHeader iHead;
718 (void) ReadHeader(ec, vp->vnodeIndex[vLarge].handle,
719 (char *)&iHead, sizeof(iHead),
720 LARGEINDEXMAGIC, LARGEINDEXVERSION);
725 struct versionStamp stamp;
727 (void) ReadHeader(ec, V_linkHandle(vp),
728 (char *)&stamp, sizeof(stamp),
729 LINKTABLEMAGIC, LINKTABLEVERSION);
734 Log("VAttachVolume: Error attaching volume %s; volume needs salvage\n",
739 if (V_needsSalvaged(vp)) {
740 if (vp->specialStatus) vp->specialStatus = 0;
741 Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
745 if (programType == fileServer) {
746 if (V_inUse(vp) && VolumeWriteable(vp)) {
747 if (!V_needsSalvaged(vp)) {
748 V_needsSalvaged(vp) = 1;
749 VUpdateVolume_r(ec,vp);
752 Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
756 if (V_destroyMe(vp) == DESTROY_ME) {
758 Log("VAttachVolume: volume %s is junk; it should be destroyed at next salvage\n", path);
764 AddVolumeToHashTable(vp, V_id(vp));
765 vp->nextVnodeUnique = V_uniquifier(vp);
766 vp->vnodeIndex[vSmall].bitmap = vp->vnodeIndex[vLarge].bitmap = NULL;
767 if (programType == fileServer && VolumeWriteable(vp)) {
769 for (i = 0; i<nVNODECLASSES; i++) {
780 if (programType == fileServer) {
781 if (vp->specialStatus) vp->specialStatus = 0;
782 if (V_blessed(vp) && V_inService(vp) && !V_needsSalvaged(vp)) {
784 V_offlineMessage(vp)[0] = '\0';
791 /* Attach an existing volume.
792 The volume also normally goes online at this time.
793 An offline volume must be reattached to make it go online.
797 VAttachVolume(ec,volumeId, mode)
805 retVal = VAttachVolume_r(ec, volumeId, mode);
812 VAttachVolume_r(ec,volumeId, mode)
818 GetVolumePath(ec,volumeId, &part, &name);
822 vp = VGetVolume_r(&error, volumeId);
824 assert(V_inUse(vp) == 0);
825 VDetachVolume_r(ec, vp);
829 return VAttachVolumeByName_r(ec, part, name, mode);
832 /* Increment a reference count to a volume, sans context swaps. Requires
833 * possibly reading the volume header in from the disk, since there's
834 * an invariant in the volume package that nUsers>0 ==> vp->header is valid.
836 * N.B. This call can fail if we can't read in the header!! In this case
837 * we still guarantee we won't context swap, but the ref count won't be
838 * incremented (otherwise we'd violate the invariant).
840 static int VHold_r(register Volume *vp)
844 if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
845 VolumeReplacements++;
846 ReadHeader(&error, V_diskDataHandle(vp),
847 (char *)&V_disk(vp), sizeof(V_disk(vp)),
848 VOLUMEINFOMAGIC, VOLUMEINFOVERSION);
849 if (error) return error;
855 static int VHold(register Volume *vp)
859 retVal = VHold_r(vp);
864 void VTakeOffline_r(register Volume *vp)
866 assert(vp->nUsers > 0);
867 assert(programType == fileServer);
868 vp->goingOffline = 1;
869 V_needsSalvaged(vp) = 1;
872 void VTakeOffline(register Volume *vp)
879 void VPutVolume_r(register Volume *vp)
881 assert(--vp->nUsers >= 0);
882 if (vp->nUsers == 0) {
883 ReleaseVolumeHeader(vp->header);
884 if (vp->goingOffline) {
886 assert(programType == fileServer);
887 vp->goingOffline = 0;
889 VUpdateVolume_r(&error, vp);
890 VCloseVolumeHandles_r(vp);
892 Log("VOffline: Volume %u (%s) is now offline",
893 V_id(vp), V_name(vp));
894 if (V_offlineMessage(vp)[0])
895 Log(" (%s)", V_offlineMessage(vp));
898 #ifdef AFS_PTHREAD_ENV
899 assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
900 #else /* AFS_PTHREAD_ENV */
901 LWP_NoYieldSignal(VPutVolume);
902 #endif /* AFS_PTHREAD_ENV */
904 if (vp->shuttingDown) {
905 VReleaseVolumeHandles_r(vp);
907 if (programType == fileServer)
908 #ifdef AFS_PTHREAD_ENV
909 assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
910 #else /* AFS_PTHREAD_ENV */
911 LWP_NoYieldSignal(VPutVolume);
912 #endif /* AFS_PTHREAD_ENV */
917 void VPutVolume(register Volume *vp)
924 /* Get a pointer to an attached volume. The pointer is returned regardless
925 of whether or not the volume is in service or on/off line. An error
926 code, however, is returned with an indication of the volume's status */
927 Volume *VGetVolume(ec,volumeId)
933 retVal = VGetVolume_r(ec,volumeId);
938 Volume *VGetVolume_r(ec,volumeId)
943 unsigned short V0=0, V1=0, V2=0, V3=0, V4=0, V5=0, V6=0, V7=0, V8=0, V9=0;
944 unsigned short V10=0, V11=0, V12=0, V13=0, V14=0, V15=0;
949 for (vp = VolumeHashTable[VOLUME_HASH(volumeId)];
950 vp && vp->hashid != volumeId; vp = vp->hashNext)
957 /* Until we have reached an initialization level of 2
958 we don't know whether this volume exists or not.
959 We can't sleep and retry later because before a volume
960 is attached, the caller tries to get it first. Just
961 return VOFFLINE and the caller can choose whether to
962 retry the command or not.*/
973 if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
975 VolumeReplacements++;
976 ReadHeader(ec, V_diskDataHandle(vp),
977 (char *)&V_disk(vp), sizeof(V_disk(vp)), VOLUMEINFOMAGIC,
981 /* Only log the error if it was a totally unexpected error. Simply
982 a missing inode is likely to be caused by the volume being deleted */
983 if (errno != ENXIO || LogLevel)
984 Log("Volume %u: couldn't reread volume header\n", vp->hashid);
991 if (vp->shuttingDown) {
997 if (programType == fileServer) {
999 if (vp->goingOffline) {
1001 #ifdef AFS_PTHREAD_ENV
1002 pthread_cond_wait(&vol_put_volume_cond, &vol_glock_mutex);
1003 #else /* AFS_PTHREAD_ENV */
1004 LWP_WaitProcess(VPutVolume);
1005 #endif /* AFS_PTHREAD_ENV */
1008 if (vp->specialStatus) {
1010 *ec = vp->specialStatus;
1012 else if (V_inService(vp)==0 || V_blessed(vp)==0) {
1016 else if (V_inUse(vp)==0) {
1027 /* if no error, bump nUsers */
1028 if (vp) vp->nUsers++;
1035 /* For both VForceOffline and VOffline, we close all relevant handles.
1036 * For VOffline, if we re-attach the volume, the files may possible be
1037 * different than before.
1039 static void VReleaseVolumeHandles_r(Volume *vp)
1041 DFlushVolume(V_id(vp));
1042 VReleaseVnodeFiles_r(vp);
1044 /* Too time consuming and unnecessary for the volserver */
1045 if (programType != volumeUtility) {
1046 IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
1047 IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
1048 IH_CONDSYNC(vp->diskDataHandle);
1050 IH_CONDSYNC(vp->linkHandle);
1051 #endif /* AFS_NT40_ENV */
1054 IH_RELEASE(vp->vnodeIndex[vLarge].handle);
1055 IH_RELEASE(vp->vnodeIndex[vSmall].handle);
1056 IH_RELEASE(vp->diskDataHandle);
1057 IH_RELEASE(vp->linkHandle);
1060 /* Force the volume offline, set the salvage flag. No further references to
1061 * the volume through the volume package will be honored. */
1062 void VForceOffline_r(Volume *vp)
1067 strcpy(V_offlineMessage(vp), "Forced offline due to internal error: volume needs to be salvaged");
1068 Log("Volume %u forced offline: it needs salvaging!\n", V_id(vp));
1070 vp->goingOffline = 0;
1071 V_needsSalvaged(vp) = 1;
1072 VUpdateVolume_r(&error, vp);
1073 #ifdef AFS_PTHREAD_ENV
1074 assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
1075 #else /* AFS_PTHREAD_ENV */
1076 LWP_NoYieldSignal(VPutVolume);
1077 #endif /* AFS_PTHREAD_ENV */
1079 VReleaseVolumeHandles_r(vp);
1083 void VForceOffline(Volume *vp)
1086 VForceOffline_r(vp);
1090 /* The opposite of VAttachVolume. The volume header is written to disk, with
1091 the inUse bit turned off. A copy of the header is maintained in memory,
1092 however (which is why this is VOffline, not VDetach).
1094 void VOffline_r(Volume *vp, char *message)
1097 VolumeId vid = V_id(vp);
1098 assert(programType != volumeUtility);
1103 if (V_offlineMessage(vp)[0] == '\0')
1104 strncpy(V_offlineMessage(vp),message,
1105 sizeof(V_offlineMessage(vp)));
1106 V_offlineMessage(vp)[sizeof(V_offlineMessage(vp))-1] = '\0';
1107 vp->goingOffline = 1;
1109 vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */
1110 if (vp) /* In case it was reattached... */
1114 void VOffline(Volume *vp, char *message)
1117 VOffline_r(vp, message);
1121 /* For VDetachVolume, we close all cached file descriptors, but keep
1122 * the Inode handles in case we need to read from a busy volume.
1124 static void VCloseVolumeHandles_r(Volume *vp)
1126 DFlushVolume(V_id(vp));
1127 VCloseVnodeFiles_r(vp);
1129 /* Too time consuming and unnecessary for the volserver */
1130 if (programType != volumeUtility) {
1131 IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
1132 IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
1133 IH_CONDSYNC(vp->diskDataHandle);
1135 IH_CONDSYNC(vp->linkHandle);
1136 #endif /* AFS_NT40_ENV */
1139 IH_REALLYCLOSE(vp->vnodeIndex[vLarge].handle);
1140 IH_REALLYCLOSE(vp->vnodeIndex[vSmall].handle);
1141 IH_REALLYCLOSE(vp->diskDataHandle);
1142 IH_REALLYCLOSE(vp->linkHandle);
1145 /* This gets used for the most part by utility routines that don't want
1146 * to keep all the volume headers around. Generally, the file server won't
1147 * call this routine, because then the offline message in the volume header
1148 * (or other information) will still be available to clients. For NAMEI, also
1149 * close the file handles.
1151 void VDetachVolume_r(Error *ec, Volume *vp)
1154 struct DiskPartition *tpartp;
1155 int notifyServer, useDone;
1157 *ec = 0; /* always "succeeds" */
1158 if (programType == volumeUtility) {
1159 notifyServer = vp->needsPutBack;
1160 useDone = (V_destroyMe(vp) == DESTROY_ME);
1162 tpartp = vp->partition;
1164 DeleteVolumeFromHashTable(vp);
1165 vp->shuttingDown = 1;
1167 /* Will be detached sometime in the future--this is OK since volume is offline */
1169 if (programType == volumeUtility && notifyServer) {
1170 /* Note: The server is not notified in the case of a bogus volume explicitly to
1171 make it possible to create a volume, do a partial restore, then abort the
1172 operation without ever putting the volume online. This is essential in the
1173 case of a volume move operation between two partitions on the same server. In
1174 that case, there would be two instances of the same volume, one of them bogus,
1175 which the file server would attempt to put on line */
1177 FSYNC_askfs(volume, tpartp->name, FSYNC_DONE, 0); /* don't put online */
1179 FSYNC_askfs(volume, tpartp->name, FSYNC_ON, 0); /* fs can use it again */
1180 /* Dettaching it so break all callbacks on it*/
1181 if (V_BreakVolumeCallbacks) {
1182 Log("volume %u detached; breaking all call backs\n", volume);
1183 (*V_BreakVolumeCallbacks)(volume);
1189 void VDetachVolume(Error *ec, Volume *vp)
1192 VDetachVolume_r(ec, vp);
1197 int VAllocBitmapEntry_r(ec,vp,index)
1200 register struct vnodeIndex *index;
1202 register byte *bp,*ep;
1204 /* This test is probably redundant */
1205 if (!VolumeWriteable(vp)) {
1209 bp = index->bitmap + index->bitmapOffset;
1210 ep = index->bitmap + index->bitmapSize;
1212 if ((*(bit32 *)bp) != 0xffffffff) {
1214 index->bitmapOffset = bp - index->bitmap;
1217 o = ffs(~*bp)-1; /* ffs is documented in BSTRING(3) */
1219 return (bp - index->bitmap)*8 + o;
1221 bp += sizeof(bit32) /* i.e. 4 */;
1223 /* No bit map entry--must grow bitmap */
1225 realloc(index->bitmap, index->bitmapSize+VOLUME_BITMAP_GROWSIZE);
1228 bp += index->bitmapSize;
1229 bzero(bp, VOLUME_BITMAP_GROWSIZE);
1230 index->bitmapOffset = index->bitmapSize;
1231 index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
1233 return index->bitmapOffset*8;
1236 int VAllocBitmapEntry(ec,vp,index)
1239 register struct vnodeIndex *index;
1243 retVal = VAllocBitmapEntry_r(ec,vp,index);
1248 void VFreeBitMapEntry_r(Error *ec, register struct vnodeIndex *index,
1251 unsigned int offset;
1253 offset = bitNumber>>3;
1254 if (offset >= index->bitmapSize) {
1258 if (offset < index->bitmapOffset)
1259 index->bitmapOffset = offset&~3; /* Truncate to nearest bit32 */
1260 *(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
1263 void VFreeBitMapEntry(Error *ec, register struct vnodeIndex *index,
1267 VFreeBitMapEntry_r(ec, index, bitNumber);
1271 void VUpdateVolume_r(Error *ec,Volume *vp)
1274 if (programType == fileServer)
1275 V_uniquifier(vp) = (V_inUse(vp)? V_nextVnodeUnique(vp) + 200: V_nextVnodeUnique(vp));
1276 /*printf("Writing volume header for '%s'\n", V_name(vp));*/
1277 WriteVolumeHeader_r(ec, vp);
1280 "VUpdateVolume: error updating volume header, volume %u (%s)\n",
1281 V_id(vp), V_name(vp));
1282 VForceOffline_r(vp);
1286 void VUpdateVolume(Error *ec, Volume *vp)
1289 VUpdateVolume_r(ec, vp);
1293 void VSyncVolume_r(Error *ec, Volume *vp)
1296 VUpdateVolume_r(ec, vp);
1299 fdP = IH_OPEN(V_diskDataHandle(vp));
1300 assert(fdP != NULL);
1301 code = FDH_SYNC(fdP);
1307 void VSyncVolume(Error *ec, Volume *vp)
1310 VSyncVolume_r(ec, vp);
1314 static void FreeVolume(vp)
1320 for (i = 0; i<nVNODECLASSES; i++)
1321 if (vp->vnodeIndex[i].bitmap)
1322 free(vp->vnodeIndex[i].bitmap);
1323 FreeVolumeHeader(vp);
1324 DeleteVolumeFromHashTable(vp);
1328 static void GetBitmap(Error *ec, Volume *vp, VnodeClass class)
1330 StreamHandle_t *file;
1334 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
1335 struct vnodeIndex *vip = &vp->vnodeIndex[class];
1336 struct VnodeDiskObject *vnode;
1337 unsigned int unique = 0;
1342 fdP = IH_OPEN(vip->handle);
1343 assert (fdP != NULL);
1344 file = FDH_FDOPEN(fdP, "r");
1345 assert (file != NULL);
1346 vnode = (VnodeDiskObject *) malloc(vcp->diskSize);
1347 assert(vnode != NULL);
1348 size = OS_SIZE(fdP->fd_fd);
1350 nVnodes = (size <= vcp->diskSize? 0: size-vcp->diskSize)
1352 vip->bitmapSize = ((nVnodes/8)+10)/4*4; /* The 10 is a little extra so
1353 a few files can be created in this volume,
1354 the whole thing is rounded up to nearest 4
1355 bytes, because the bit map allocator likes
1357 vip->bitmap = (byte *) calloc(1, vip->bitmapSize);
1358 assert(vip->bitmap != NULL);
1359 vip->bitmapOffset = 0;
1360 if (STREAM_SEEK(file,vcp->diskSize,0) != -1) {
1362 for (bitNumber = 0; bitNumber < nVnodes+100; bitNumber++) {
1363 if (STREAM_READ(vnode, vcp->diskSize, 1, file) != 1)
1365 if (vnode->type != vNull) {
1366 if (vnode->vnodeMagic != vcp->magic) {
1367 Log("GetBitmap: addled vnode index in volume %s; volume needs salvage\n",
1372 *(vip->bitmap + (bitNumber>>3)) |= (1 << (bitNumber & 0x7));
1373 if (unique <= vnode->uniquifier)
1374 unique = vnode->uniquifier + 1;
1376 #ifndef AFS_PTHREAD_ENV
1377 if ((bitNumber & 0x00ff) == 0x0ff) { /* every 256 iterations */
1380 #endif /* !AFS_PTHREAD_ENV */
1383 if (vp->nextVnodeUnique < unique) {
1384 Log("GetBitmap: bad volume uniquifier for volume %s; volume needs salvage\n", V_name(vp));
1387 /* Paranoia, partly justified--I think fclose after fdopen
1388 * doesn't seem to close fd. In any event, the documentation
1389 * doesn't specify, so it's safer to close it twice.
1396 static void GetVolumePath(Error *ec, VolId volumeId, char **partitionp,
1399 static char partition[VMAXPATHLEN], name[VMAXPATHLEN];
1400 char path[VMAXPATHLEN];
1402 struct DiskPartition *dp;
1406 sprintf(&name[1],VFORMAT,volumeId);
1407 for (dp = DiskPartitionList; dp; dp = dp->next) {
1409 strcpy(path, VPartitionPath(dp));
1411 if (stat(path,&status) == 0) {
1412 strcpy(partition, dp->name);
1419 *partitionp = *namep = NULL;
1422 *partitionp = partition;
1432 return atoi(name+1);
1435 char *VolumeExternalName(volumeId)
1438 static char name[15];
1439 sprintf(name,VFORMAT,volumeId);
1443 #if TRANSARC_VOL_STATS
1444 #define OneDay (86400) /* 24 hours' worth of seconds */
1446 #define OneDay (24*60*60) /* 24 hours */
1447 #endif /* TRANSARC_VOL_STATS */
1449 #define Midnight(date) ((date-TimeZoneCorrection)/OneDay*OneDay+TimeZoneCorrection)
1451 /*------------------------------------------------------------------------
1452 * [export] VAdjustVolumeStatistics
1455 * If we've passed midnight, we need to update all the day use
1456 * statistics as well as zeroing the detailed volume statistics
1457 * (if we are implementing them).
1460 * vp : Pointer to the volume structure describing the lucky
1461 * volume being considered for update.
1467 * Nothing interesting.
1471 *------------------------------------------------------------------------*/
1473 VAdjustVolumeStatistics_r(vp)
1474 register Volume *vp;
1476 { /*VAdjustVolumeStatistics*/
1478 unsigned int now = FT_ApproxTime();
1480 if (now - V_dayUseDate(vp) > OneDay) {
1483 ndays = (now - V_dayUseDate(vp)) / OneDay;
1484 for (i = 6; i>ndays-1; i--)
1485 V_weekUse(vp)[i] = V_weekUse(vp)[i-ndays];
1486 for (i = 0; i<ndays-1 && i<7; i++)
1487 V_weekUse(vp)[i] = 0;
1489 V_weekUse(vp)[ndays-1] = V_dayUse(vp);
1491 V_dayUseDate(vp) = Midnight(now);
1493 #if TRANSARC_VOL_STATS
1495 * All we need to do is bzero the entire VOL_STATS_BYTES of
1496 * the detailed volume statistics area.
1498 bzero((char *)(V_stat_area(vp)), VOL_STATS_BYTES);
1499 #endif /* TRANSARC_VOL_STATS */
1500 } /*It's been more than a day of collection*/
1502 #if TRANSARC_VOL_STATS
1504 * Always return happily.
1507 #endif /* TRANSARC_VOL_STATS */
1509 } /*VAdjustVolumeStatistics*/
1511 VAdjustVolumeStatistics(vp)
1512 register Volume *vp;
1516 VAdjustVolumeStatistics_r(vp);
1521 void VBumpVolumeUsage_r(register Volume *vp)
1523 unsigned int now = FT_ApproxTime();
1524 if (now - V_dayUseDate(vp) > OneDay)
1525 VAdjustVolumeStatistics_r(vp);
1527 * Save the volume header image to disk after every 128 bumps to dayUse.
1529 if ((V_dayUse(vp)++ & 127) == 0) {
1531 VUpdateVolume_r(&error, vp);
1535 void VBumpVolumeUsage(register Volume *vp)
1538 VBumpVolumeUsage_r(vp);
1542 void VSetDiskUsage_r(void)
1544 static int FifteenMinuteCounter = 0;
1547 /* NOTE: Don't attempt to access the partitions list until the
1548 initialization level indicates that all volumes are attached,
1549 which implies that all partitions are initialized. */
1550 #ifdef AFS_PTHREAD_ENV
1552 #else /* AFS_PTHREAD_ENV */
1554 #endif /* AFS_PTHREAD_ENV */
1557 VResetDiskUsage_r();
1558 if (++FifteenMinuteCounter == 3) {
1559 FifteenMinuteCounter = 0;
1564 void VSetDiskUsage(void)
1571 /* The number of minutes that a volume hasn't been updated before the
1572 * "Dont salvage" flag in the volume header will be turned on */
1573 #define SALVAGE_INTERVAL (10*60)
1575 static VolumeId *UpdateList; /* Pointer to array of Volume ID's */
1576 static int nUpdatedVolumes; /* Updated with entry in UpdateList, salvage after crash flag on */
1577 static int updateSize; /* number of entries possible */
1578 #define UPDATE_LIST_SIZE 100 /* size increment */
1580 void VAddToVolumeUpdateList_r(Error *ec, Volume *vp)
1583 vp->updateTime = FT_ApproxTime();
1584 if (V_dontSalvage(vp) == 0)
1586 V_dontSalvage(vp) = 0;
1587 VSyncVolume_r(ec, vp);
1591 updateSize = UPDATE_LIST_SIZE;
1592 UpdateList = (VolumeId *) malloc(sizeof (VolumeId) * updateSize);
1594 if (nUpdatedVolumes == updateSize) {
1595 updateSize += UPDATE_LIST_SIZE;
1596 UpdateList = (VolumeId *) realloc(UpdateList, sizeof (VolumeId) * updateSize);
1599 UpdateList[nUpdatedVolumes++] = V_id(vp);
1602 static void VScanUpdateList() {
1603 register int i, gap;
1604 register Volume *vp;
1606 afs_int32 now = FT_ApproxTime();
1607 /* Be careful with this code, since it works with interleaved calls to AddToVolumeUpdateList */
1608 for (i = gap = 0; i<nUpdatedVolumes; i++) {
1609 vp = VGetVolume_r(&error, UpdateList[i-gap] = UpdateList[i]);
1612 } else if (vp->nUsers == 1 && now - vp->updateTime > SALVAGE_INTERVAL) {
1613 V_dontSalvage(vp) = DONT_SALVAGE;
1614 VUpdateVolume_r(&error, vp); /* No need to fsync--not critical */
1619 #ifndef AFS_PTHREAD_ENV
1621 #endif /* !AFS_PTHREAD_ENV */
1623 nUpdatedVolumes -= gap;
1626 /***************************************************/
1627 /* Add on routines to manage a volume header cache */
1628 /***************************************************/
1630 static struct volHeader *volumeLRU;
1632 /* Allocate a bunch of headers; string them together */
1633 static void InitLRU(howMany)
1636 register struct volHeader *hp;
1637 if (programType != fileServer)
1639 hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader)));
1641 ReleaseVolumeHeader(hp++);
1644 /* Get a volume header from the LRU list; update the old one if necessary */
1645 /* Returns 1 if there was already a header, which is removed from the LRU list */
1646 static int GetVolumeHeader(vp)
1647 register Volume *vp;
1650 register struct volHeader *hd;
1652 static int everLogged = 0;
1654 old = (vp->header != 0); /* old == volume already has a header */
1655 if (programType != fileServer) {
1657 hd = (struct volHeader *) calloc(1, sizeof(*vp->header));
1666 if (volumeLRU == hd)
1667 volumeLRU = hd->next;
1668 assert(hd->back == vp);
1672 hd = volumeLRU->prev; /* not currently in use and least recently used */
1674 hd = (struct volHeader *) calloc(1, sizeof(*vp->header));
1675 hd->prev = hd->next = hd; /* make it look like single elt LRU */
1677 Log("****Allocated more volume headers, probably leak****\n");
1682 if (hd->diskstuff.inUse) {
1683 WriteVolumeHeader_r(&error, hd->back);
1684 /* Ignore errors; catch them later */
1686 hd->back->header = 0;
1691 if (hd->next) { /* hd->next != 0 --> in LRU chain (we zero it later) */
1692 hd->prev->next = hd->next; /* pull hd out of LRU list */
1693 hd->next->prev = hd->prev; /* if hd only element, this is noop */
1695 hd->next = hd->prev = 0;
1696 /* if not in LRU chain, next test won't be true */
1697 if (hd == volumeLRU) /* last header item, turn into empty list */
1698 volumeLRU = (struct volHeader *) 0;
1703 /* Put it at the top of the LRU chain */
1704 static void ReleaseVolumeHeader(hd)
1705 register struct volHeader *hd;
1707 if (programType != fileServer)
1709 if (!hd || hd->next) /* no header, or header already released */
1712 hd->next = hd->prev = hd;
1714 hd->prev = volumeLRU->prev;
1715 hd->next = volumeLRU;
1716 hd->prev->next = hd->next->prev = hd;
1721 static void FreeVolumeHeader(vp)
1722 register Volume *vp;
1724 register struct volHeader *hd = vp->header;
1727 if (programType == fileServer) {
1728 ReleaseVolumeHeader(hd);
1738 /***************************************************/
1739 /* Routines to add volume to hash chain, delete it */
1740 /***************************************************/
1742 static void AddVolumeToHashTable(vp, hashid)
1743 register Volume *vp;
1745 int hash = VOLUME_HASH(hashid);
1746 vp->hashid = hashid;
1747 vp->hashNext = VolumeHashTable[hash];
1748 VolumeHashTable[hash] = vp;
1749 vp->vnodeHashOffset = VolumeHashOffset_r();
1752 static void DeleteVolumeFromHashTable(vp)
1753 register Volume *vp;
1755 int hash = VOLUME_HASH(vp->hashid);
1756 if (VolumeHashTable[hash] == vp)
1757 VolumeHashTable[hash] = vp->hashNext;
1759 Volume *tvp = VolumeHashTable[hash];
1762 while (tvp->hashNext && tvp->hashNext != vp)
1763 tvp = tvp->hashNext;
1764 if (tvp->hashNext == NULL)
1766 tvp->hashNext = vp->hashNext;
1771 void VPrintCacheStats_r(void)
1773 register struct VnodeClassInfo *vcp;
1774 vcp = &VnodeClassInfo[vLarge];
1775 Log("Large vnode cache, %d entries, %d allocs, %d gets (%d reads), %d writes\n",
1776 vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
1777 vcp = &VnodeClassInfo[vSmall];
1778 Log("Small vnode cache,%d entries, %d allocs, %d gets (%d reads), %d writes\n",
1779 vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
1780 Log("Volume header cache, %d entries, %d gets, %d replacements\n",
1781 VolumeCacheSize, VolumeGets, VolumeReplacements);
1784 void VPrintCacheStats(void)
1787 VPrintCacheStats_r();