/* * Copyright 2000, International Business Machines Corporation and others. * All Rights Reserved. * * This software has been released under the terms of the IBM Public * License. For details, see the LICENSE file in the top-level source * directory or online at http://www.openafs.org/dl/license10.html */ /* 1/1/89: NB: this stuff is all going to be replaced. Don't take it too seriously */ /* System: VICE-TWO Module: volume.c Institution: The Information Technology Center, Carnegie-Mellon University */ #include #include RCSID ("$Header$"); #include #include #include #ifndef AFS_NT40_ENV #include #if !defined(AFS_SGI_ENV) #ifdef AFS_OSF_ENV #include #else /* AFS_OSF_ENV */ #ifdef AFS_VFSINCL_ENV #define VFS #ifdef AFS_SUN5_ENV #include #else #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV) #include #include #else #include #endif #endif #else /* AFS_VFSINCL_ENV */ #if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) #include #endif #endif /* AFS_VFSINCL_ENV */ #endif /* AFS_OSF_ENV */ #endif /* AFS_SGI_ENV */ #endif /* AFS_NT40_ENV */ #include #include #include #ifdef AFS_NT40_ENV #include #else #include #endif #include #ifdef AFS_AIX_ENV #include #include #else #ifdef AFS_HPUX_ENV #include #include #else #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV) #ifdef AFS_SUN5_ENV #include #include #else #include #endif #else #ifndef AFS_NT40_ENV #if defined(AFS_SGI_ENV) #include #include #ifdef AFS_SGI_EFS_IOPS_ENV #define ROOTINO EFS_ROOTINO #include #include "sgiefs/efs.h" /* until 5.1 release */ #endif #else #ifndef AFS_LINUX20_ENV #include /* Need to find in libc 5, present in libc 6 */ #endif #endif #endif /* AFS_SGI_ENV */ #endif #endif /* AFS_HPUX_ENV */ #endif #ifndef AFS_NT40_ENV #include #include #include #include #ifndef ITIMER_REAL #include #endif /* ITIMER_REAL */ #endif /* AFS_NT40_ENV */ #if defined(AFS_SUN5_ENV) || defined(AFS_NT40_ENV) || defined(AFS_LINUX20_ENV) #include #else #include #endif #include "nfs.h" #include #include "lock.h" #include "lwp.h" #include #include "ihandle.h" #include #ifdef AFS_NT40_ENV #include #endif #include "vnode.h" #include "volume.h" #include "partition.h" #ifdef AFS_PTHREAD_ENV #include #else /* AFS_PTHREAD_ENV */ #include "afs/assert.h" #endif /* AFS_PTHREAD_ENV */ #include "vutils.h" #include "fssync.h" #ifndef AFS_NT40_ENV #include #endif #ifdef O_LARGEFILE #define afs_stat stat64 #define afs_fstat fstat64 #define afs_open open64 #else /* !O_LARGEFILE */ #define afs_stat stat #define afs_fstat fstat #define afs_open open #endif /* !O_LARGEFILE */ #ifdef AFS_PTHREAD_ENV pthread_mutex_t vol_glock_mutex; pthread_mutex_t vol_attach_mutex; pthread_mutex_t vol_fsync_mutex; pthread_mutex_t vol_trans_mutex; pthread_cond_t vol_put_volume_cond; pthread_cond_t vol_sleep_cond; int vol_attach_threads = 1; #endif /* AFS_PTHREAD_ENV */ #ifdef AFS_OSF_ENV extern void *calloc(), *realloc(); #endif /*@printflike@*/ extern void Log(const char *format, ...); /* Forward declarations */ static Volume *attach2(Error * ec, char *path, register struct VolumeHeader *header, struct DiskPartition *partp, int isbusy); static void FreeVolume(Volume * vp); static void VScanUpdateList(void); static void InitLRU(int howMany); static int GetVolumeHeader(register Volume * vp); static void ReleaseVolumeHeader(register struct volHeader *hd); static void FreeVolumeHeader(register Volume * vp); static void AddVolumeToHashTable(register Volume * vp, int hashid); static void DeleteVolumeFromHashTable(register Volume * vp); static int VHold(Volume * vp); static int VHold_r(Volume * vp); static void GetBitmap(Error * ec, Volume * vp, VnodeClass class); static void GetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep); static void VReleaseVolumeHandles_r(Volume * vp); static void VCloseVolumeHandles_r(Volume * vp); int LogLevel; /* Vice loglevel--not defined as extern so that it will be * defined when not linked with vice, XXXX */ ProgramType programType; /* The type of program using the package */ #define VOLUME_BITMAP_GROWSIZE 16 /* bytes, => 128vnodes */ /* Must be a multiple of 4 (1 word) !! */ #define VOLUME_HASH_TABLE_SIZE 128 /* Must be a power of 2!! */ #define VOLUME_HASH(volumeId) (volumeId&(VOLUME_HASH_TABLE_SIZE-1)) private Volume *VolumeHashTable[VOLUME_HASH_TABLE_SIZE]; #ifndef AFS_HAVE_FFS /* This macro is used where an ffs() call does not exist. Was in util/ffs.c */ ffs(x) { afs_int32 ffs_i; afs_int32 ffs_tmp = x; if (ffs_tmp == 0) return (-1); else for (ffs_i = 1;; ffs_i++) { if (ffs_tmp & 1) return (ffs_i); else ffs_tmp >>= 1; } } #endif /* !AFS_HAVE_FFS */ #ifdef AFS_PTHREAD_ENV #include "rx/rx_queue.h" typedef struct diskpartition_queue_t { struct rx_queue queue; struct DiskPartition * diskP; } diskpartition_queue_t; typedef struct vinitvolumepackage_thread_t { struct rx_queue queue; pthread_cond_t thread_done_cv; int n_threads_complete; } vinitvolumepackage_thread_t; static void * VInitVolumePackageThread(void * args); #endif /* AFS_PTHREAD_ENV */ struct Lock vol_listLock; /* Lock obtained when listing volumes: prevents a volume from being missed if the volume is attached during a list volumes */ extern struct Lock FSYNC_handler_lock; static int TimeZoneCorrection; /* Number of seconds west of GMT */ /* Common message used when the volume goes off line */ char *VSalvageMessage = "Files in this volume are currently unavailable; call operations"; int VInit; /* 0 - uninitialized, * 1 - initialized but not all volumes have been attached, * 2 - initialized and all volumes have been attached, * 3 - initialized, all volumes have been attached, and * VConnectFS() has completed. */ bit32 VolumeCacheCheck; /* Incremented everytime a volume goes on line-- * used to stamp volume headers and in-core * vnodes. When the volume goes on-line the * vnode will be invalidated * access only with VOL_LOCK held */ int VolumeCacheSize = 200, VolumeGets = 0, VolumeReplacements = 0, Vlooks = 0; int VInitVolumePackage(ProgramType pt, int nLargeVnodes, int nSmallVnodes, int connect, int volcache) { int errors = 0; /* Number of errors while finding vice partitions. */ struct timeval tv; struct timezone tz; programType = pt; #ifdef AFS_PTHREAD_ENV assert(pthread_mutex_init(&vol_glock_mutex, NULL) == 0); assert(pthread_mutex_init(&vol_attach_mutex, NULL) == 0); assert(pthread_mutex_init(&vol_fsync_mutex, NULL) == 0); assert(pthread_mutex_init(&vol_trans_mutex, NULL) == 0); assert(pthread_cond_init(&vol_put_volume_cond, NULL) == 0); assert(pthread_cond_init(&vol_sleep_cond, NULL) == 0); #else /* AFS_PTHREAD_ENV */ IOMGR_Initialize(); #endif /* AFS_PTHREAD_ENV */ Lock_Init(&vol_listLock); Lock_Init(&FSYNC_handler_lock); srandom(time(0)); /* For VGetVolumeInfo */ gettimeofday(&tv, &tz); TimeZoneCorrection = tz.tz_minuteswest * 60; /* Ok, we have done enough initialization that fileserver can * start accepting calls, even though the volumes may not be * available just yet. */ VInit = 1; if (programType == fileServer) { /* File server or "stand" */ FSYNC_fsInit(); } if (volcache > VolumeCacheSize) VolumeCacheSize = volcache; InitLRU(VolumeCacheSize); VInitVnodes(vLarge, nLargeVnodes); VInitVnodes(vSmall, nSmallVnodes); errors = VAttachPartitions(); if (errors) return -1; if (programType == fileServer) { struct DiskPartition *diskP; #ifdef AFS_PTHREAD_ENV struct vinitvolumepackage_thread_t params; struct diskpartition_queue_t * dpq; int i, len; pthread_t tid; pthread_attr_t attrs; assert(pthread_cond_init(¶ms.thread_done_cv,NULL) == 0); queue_Init(¶ms); params.n_threads_complete = 0; /* create partition work queue */ for (len=0, diskP = DiskPartitionList; diskP; diskP = diskP->next, len++) { dpq = (diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t)); assert(dpq != NULL); dpq->diskP = diskP; queue_Prepend(¶ms,dpq); } assert(pthread_attr_init(&attrs) == 0); assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0); len = MIN(len, vol_attach_threads); VOL_LOCK; for (i=0; i < len; i++) { assert(pthread_create (&tid, &attrs, &VInitVolumePackageThread, ¶ms) == 0); } while(params.n_threads_complete < len) { pthread_cond_wait(¶ms.thread_done_cv,&vol_glock_mutex); } VOL_UNLOCK; assert(pthread_cond_destroy(¶ms.thread_done_cv) == 0); #else /* AFS_PTHREAD_ENV */ DIR *dirp; struct dirent *dp; /* Attach all the volumes in this partition */ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) { int nAttached = 0, nUnattached = 0; Log("Partition %s: attaching volumes\n", diskP->name); dirp = opendir(VPartitionPath(diskP)); assert(dirp); while ((dp = readdir(dirp))) { char *p; p = strrchr(dp->d_name, '.'); if (p != NULL && strcmp(p, VHDREXT) == 0) { Error error; Volume *vp; vp = VAttachVolumeByName(&error, diskP->name, dp->d_name, V_VOLUPD); (*(vp ? &nAttached : &nUnattached))++; if (error == VOFFLINE) Log("Volume %d stays offline (/vice/offline/%s exists)\n", VolumeNumber(dp->d_name), dp->d_name); else if (LogLevel >= 5) { Log("Partition %s: attached volume %d (%s)\n", diskP->name, VolumeNumber(dp->d_name), dp->d_name); } if (vp) { VPutVolume(vp); } } } Log("Partition %s: attached %d volumes; %d volumes not attached\n", diskP->name, nAttached, nUnattached); closedir(dirp); } #endif /* AFS_PTHREAD_ENV */ } VInit = 2; /* Initialized, and all volumes have been attached */ if (programType == volumeUtility && connect) { if (!VConnectFS()) { Log("Unable to connect to file server; aborted\n"); Lock_Destroy(&FSYNC_handler_lock); exit(1); } } return 0; } #ifdef AFS_PTHREAD_ENV static void * VInitVolumePackageThread(void * args) { int errors = 0; /* Number of errors while finding vice partitions. */ DIR *dirp; struct dirent *dp; struct DiskPartition *diskP; struct vinitvolumepackage_thread_t * params; struct diskpartition_queue_t * dpq; params = (vinitvolumepackage_thread_t *) args; VOL_LOCK; /* Attach all the volumes in this partition */ while (queue_IsNotEmpty(params)) { int nAttached = 0, nUnattached = 0; dpq = queue_First(params,diskpartition_queue_t); queue_Remove(dpq); VOL_UNLOCK; diskP = dpq->diskP; free(dpq); Log("Partition %s: attaching volumes\n", diskP->name); dirp = opendir(VPartitionPath(diskP)); assert(dirp); while ((dp = readdir(dirp))) { char *p; p = strrchr(dp->d_name, '.'); if (p != NULL && strcmp(p, VHDREXT) == 0) { Error error; Volume *vp; vp = VAttachVolumeByName(&error, diskP->name, dp->d_name, V_VOLUPD); (*(vp ? &nAttached : &nUnattached))++; if (error == VOFFLINE) Log("Volume %d stays offline (/vice/offline/%s exists)\n", VolumeNumber(dp->d_name), dp->d_name); else if (LogLevel >= 5) { Log("Partition %s: attached volume %d (%s)\n", diskP->name, VolumeNumber(dp->d_name), dp->d_name); } if (vp) { VPutVolume(vp); } } } Log("Partition %s: attached %d volumes; %d volumes not attached\n", diskP->name, nAttached, nUnattached); closedir(dirp); VOL_LOCK; } params->n_threads_complete++; pthread_cond_signal(¶ms->thread_done_cv); VOL_UNLOCK; return NULL; } #endif /* AFS_PTHREAD_ENV */ /* This must be called by any volume utility which needs to run while the file server is also running. This is separated from VInitVolumePackage so that a utility can fork--and each of the children can independently initialize communication with the file server */ int VConnectFS(void) { int retVal; VOL_LOCK; retVal = VConnectFS_r(); VOL_UNLOCK; return retVal; } int VConnectFS_r(void) { int rc; assert(VInit == 2 && programType == volumeUtility); rc = FSYNC_clientInit(); if (rc) VInit = 3; return rc; } void VDisconnectFS_r(void) { assert(programType == volumeUtility); FSYNC_clientFinis(); VInit = 2; } void VDisconnectFS(void) { VOL_LOCK; VDisconnectFS_r(); VOL_UNLOCK; } void VShutdown_r(void) { int i; register Volume *vp, *np; register afs_int32 code; Log("VShutdown: shutting down on-line volumes...\n"); for (i = 0; i < VOLUME_HASH_TABLE_SIZE; i++) { /* try to hold first volume in the hash table */ for (vp = VolumeHashTable[i]; vp; vp = vp->hashNext) { code = VHold_r(vp); if (code == 0) break; /* got it */ /* otherwise we go around again, trying another volume */ } while (vp) { if (LogLevel >= 5) Log("VShutdown: Attempting to take volume %u offline.\n", vp->hashid); /* first compute np before releasing vp, in case vp disappears * after releasing. Hold it, so it doesn't disapear. If we * can't hold it, try the next one in the chain. Invariant * at the top of this loop is that vp is held (has extra ref count). */ for (np = vp->hashNext; np; np = np->hashNext) { code = VHold_r(np); if (code == 0) break; /* got it */ } /* next, take the volume offline (drops reference count) */ VOffline_r(vp, "File server was shut down"); vp = np; /* next guy to try */ } } Log("VShutdown: complete.\n"); } void VShutdown(void) { VOL_LOCK; VShutdown_r(); VOL_UNLOCK; } static void ReadHeader(Error * ec, IHandle_t * h, char *to, int size, bit32 magic, bit32 version) { struct versionStamp *vsn; FdHandle_t *fdP; *ec = 0; if (h == NULL) { *ec = VSALVAGE; return; } fdP = IH_OPEN(h); if (fdP == NULL) { *ec = VSALVAGE; return; } if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) { *ec = VSALVAGE; FDH_REALLYCLOSE(fdP); return; } vsn = (struct versionStamp *)to; if (FDH_READ(fdP, to, size) != size || vsn->magic != magic) { *ec = VSALVAGE; FDH_REALLYCLOSE(fdP); return; } FDH_CLOSE(fdP); /* Check is conditional, in case caller wants to inspect version himself */ if (version && vsn->version != version) { *ec = VSALVAGE; } } /* VolumeHeaderToDisk * Allows for storing 64 bit inode numbers in on-disk volume header * file. */ void VolumeHeaderToDisk(VolumeDiskHeader_t * dh, VolumeHeader_t * h) { memset((char *)dh, 0, sizeof(VolumeDiskHeader_t)); dh->stamp = h->stamp; dh->id = h->id; dh->parent = h->parent; #ifdef AFS_64BIT_IOPS_ENV dh->volumeInfo_lo = (afs_int32) h->volumeInfo & 0xffffffff; dh->volumeInfo_hi = (afs_int32) (h->volumeInfo >> 32) & 0xffffffff; dh->smallVnodeIndex_lo = (afs_int32) h->smallVnodeIndex & 0xffffffff; dh->smallVnodeIndex_hi = (afs_int32) (h->smallVnodeIndex >> 32) & 0xffffffff; dh->largeVnodeIndex_lo = (afs_int32) h->largeVnodeIndex & 0xffffffff; dh->largeVnodeIndex_hi = (afs_int32) (h->largeVnodeIndex >> 32) & 0xffffffff; dh->linkTable_lo = (afs_int32) h->linkTable & 0xffffffff; dh->linkTable_hi = (afs_int32) (h->linkTable >> 32) & 0xffffffff; #else dh->volumeInfo_lo = h->volumeInfo; dh->smallVnodeIndex_lo = h->smallVnodeIndex; dh->largeVnodeIndex_lo = h->largeVnodeIndex; dh->linkTable_lo = h->linkTable; #endif } /* DiskToVolumeHeader * Reads volume header file from disk, convering 64 bit inodes * if required. Makes the assumption that AFS has *always* * zero'd the volume header file so that high parts of inode * numbers are 0 in older (SGI EFS) volume header files. */ void DiskToVolumeHeader(VolumeHeader_t * h, VolumeDiskHeader_t * dh) { memset((char *)h, 0, sizeof(VolumeHeader_t)); h->stamp = dh->stamp; h->id = dh->id; h->parent = dh->parent; #ifdef AFS_64BIT_IOPS_ENV h->volumeInfo = (Inode) dh->volumeInfo_lo | ((Inode) dh->volumeInfo_hi << 32); h->smallVnodeIndex = (Inode) dh->smallVnodeIndex_lo | ((Inode) dh-> smallVnodeIndex_hi << 32); h->largeVnodeIndex = (Inode) dh->largeVnodeIndex_lo | ((Inode) dh-> largeVnodeIndex_hi << 32); h->linkTable = (Inode) dh->linkTable_lo | ((Inode) dh->linkTable_hi << 32); #else h->volumeInfo = dh->volumeInfo_lo; h->smallVnodeIndex = dh->smallVnodeIndex_lo; h->largeVnodeIndex = dh->largeVnodeIndex_lo; h->linkTable = dh->linkTable_lo; #endif } void WriteVolumeHeader_r(ec, vp) Error *ec; Volume *vp; { IHandle_t *h = V_diskDataHandle(vp); FdHandle_t *fdP; *ec = 0; fdP = IH_OPEN(h); if (fdP == NULL) { *ec = VSALVAGE; return; } if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) { *ec = VSALVAGE; FDH_REALLYCLOSE(fdP); return; } if (FDH_WRITE(fdP, (char *)&V_disk(vp), sizeof(V_disk(vp))) != sizeof(V_disk(vp))) { *ec = VSALVAGE; FDH_REALLYCLOSE(fdP); return; } FDH_CLOSE(fdP); } /* Attach an existing volume, given its pathname, and return a pointer to the volume header information. The volume also normally goes online at this time. An offline volume must be reattached to make it go online */ Volume * VAttachVolumeByName(Error * ec, char *partition, char *name, int mode) { Volume *retVal; VATTACH_LOCK; VOL_LOCK; retVal = VAttachVolumeByName_r(ec, partition, name, mode); VOL_UNLOCK; VATTACH_UNLOCK; return retVal; } Volume * VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode) { register Volume *vp; int fd, n; struct afs_stat status; struct VolumeDiskHeader diskHeader; struct VolumeHeader iheader; struct DiskPartition *partp; char path[64]; int isbusy = 0; *ec = 0; if (programType == volumeUtility) { assert(VInit == 3); VLockPartition_r(partition); } if (programType == fileServer) { vp = VGetVolume_r(ec, VolumeNumber(name)); if (vp) { if (V_inUse(vp)) return vp; if (vp->specialStatus == VBUSY) isbusy = 1; VDetachVolume_r(ec, vp); if (*ec) { Log("VAttachVolume: Error detaching volume (%s)\n", name); } } } if (!(partp = VGetPartition_r(partition, 0))) { *ec = VNOVOL; Log("VAttachVolume: Error getting partition (%s)\n", partition); goto done; } *ec = 0; strcpy(path, VPartitionPath(partp)); strcat(path, "/"); strcat(path, name); VOL_UNLOCK; if ((fd = afs_open(path, O_RDONLY)) == -1 || afs_fstat(fd, &status) == -1) { Log("VAttachVolume: Failed to open %s (errno %d)\n", path, errno); if (fd > -1) close(fd); VOL_LOCK; *ec = VNOVOL; goto done; } n = read(fd, &diskHeader, sizeof(diskHeader)); close(fd); VOL_LOCK; if (n != sizeof(diskHeader) || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) { Log("VAttachVolume: Error reading volume header %s\n", path); *ec = VSALVAGE; goto done; } if (diskHeader.stamp.version != VOLUMEHEADERVERSION) { Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n", path); *ec = VSALVAGE; goto done; } DiskToVolumeHeader(&iheader, &diskHeader); if (programType == volumeUtility && mode != V_SECRETLY && mode != V_PEEK) { if (FSYNC_askfs(iheader.id, partition, FSYNC_NEEDVOLUME, mode) == FSYNC_DENIED) { Log("VAttachVolume: attach of volume %u apparently denied by file server\n", iheader.id); *ec = VNOVOL; /* XXXX */ goto done; } } vp = attach2(ec, path, &iheader, partp, isbusy); if (programType == volumeUtility && vp) { /* duplicate computation in fssync.c about whether the server * takes the volume offline or not. If the volume isn't * offline, we must not return it when we detach the volume, * or the server will abort */ if (mode == V_READONLY || mode == V_PEEK || (!VolumeWriteable(vp) && (mode == V_CLONE || mode == V_DUMP))) vp->needsPutBack = 0; else vp->needsPutBack = 1; } /* OK, there's a problem here, but one that I don't know how to * fix right now, and that I don't think should arise often. * Basically, we should only put back this volume to the server if * it was given to us by the server, but since we don't have a vp, * we can't run the VolumeWriteable function to find out as we do * above when computing vp->needsPutBack. So we send it back, but * there's a path in VAttachVolume on the server which may abort * if this volume doesn't have a header. Should be pretty rare * for all of that to happen, but if it does, probably the right * fix is for the server to allow the return of readonly volumes * that it doesn't think are really checked out. */ if (programType == volumeUtility && vp == NULL && mode != V_SECRETLY && mode != V_PEEK) { FSYNC_askfs(iheader.id, partition, FSYNC_ON, 0); } else if (programType == fileServer && vp) { V_needsCallback(vp) = 0; #ifdef notdef if (VInit >= 2 && V_BreakVolumeCallbacks) { Log("VAttachVolume: Volume %u was changed externally; breaking callbacks\n", V_id(vp)); (*V_BreakVolumeCallbacks) (V_id(vp)); } #endif VUpdateVolume_r(ec, vp); if (*ec) { Log("VAttachVolume: Error updating volume\n"); if (vp) VPutVolume_r(vp); goto done; } if (VolumeWriteable(vp) && V_dontSalvage(vp) == 0) { /* This is a hack: by temporarily settint the incore * dontSalvage flag ON, the volume will be put back on the * Update list (with dontSalvage OFF again). It will then * come back in N minutes with DONT_SALVAGE eventually * set. This is the way that volumes that have never had * it set get it set; or that volumes that have been * offline without DONT SALVAGE having been set also * eventually get it set */ V_dontSalvage(vp) = DONT_SALVAGE; VAddToVolumeUpdateList_r(ec, vp); if (*ec) { Log("VAttachVolume: Error adding volume to update list\n"); if (vp) VPutVolume_r(vp); goto done; } } if (LogLevel) Log("VOnline: volume %u (%s) attached and online\n", V_id(vp), V_name(vp)); } done: if (programType == volumeUtility) { VUnlockPartition_r(partition); } if (*ec) return NULL; else return vp; } private Volume * attach2(Error * ec, char *path, register struct VolumeHeader * header, struct DiskPartition * partp, int isbusy) { register Volume *vp; VOL_UNLOCK; vp = (Volume *) calloc(1, sizeof(Volume)); assert(vp != NULL); vp->specialStatus = (byte) (isbusy ? VBUSY : 0); vp->device = partp->device; vp->partition = partp; IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header->parent, header->largeVnodeIndex); IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header->parent, header->smallVnodeIndex); IH_INIT(vp->diskDataHandle, partp->device, header->parent, header->volumeInfo); IH_INIT(vp->linkHandle, partp->device, header->parent, header->linkTable); vp->shuttingDown = 0; vp->goingOffline = 0; vp->nUsers = 1; VOL_LOCK; vp->cacheCheck = ++VolumeCacheCheck; /* just in case this ever rolls over */ if (!vp->cacheCheck) vp->cacheCheck = ++VolumeCacheCheck; GetVolumeHeader(vp); VOL_UNLOCK; (void)ReadHeader(ec, V_diskDataHandle(vp), (char *)&V_disk(vp), sizeof(V_disk(vp)), VOLUMEINFOMAGIC, VOLUMEINFOVERSION); VOL_LOCK; if (*ec) { Log("VAttachVolume: Error reading diskDataHandle vol header %s; error=%u\n", path, *ec); } if (!*ec) { struct IndexFileHeader iHead; #if OPENAFS_VOL_STATS /* * We just read in the diskstuff part of the header. If the detailed * volume stats area has not yet been initialized, we should bzero the * area and mark it as initialized. */ if (!(V_stat_initialized(vp))) { memset((char *)(V_stat_area(vp)), 0, VOL_STATS_BYTES); V_stat_initialized(vp) = 1; } #endif /* OPENAFS_VOL_STATS */ VOL_UNLOCK; (void)ReadHeader(ec, vp->vnodeIndex[vSmall].handle, (char *)&iHead, sizeof(iHead), SMALLINDEXMAGIC, SMALLINDEXVERSION); VOL_LOCK; if (*ec) { Log("VAttachVolume: Error reading smallVnode vol header %s; error=%u\n", path, *ec); } } if (!*ec) { struct IndexFileHeader iHead; VOL_UNLOCK; (void)ReadHeader(ec, vp->vnodeIndex[vLarge].handle, (char *)&iHead, sizeof(iHead), LARGEINDEXMAGIC, LARGEINDEXVERSION); VOL_LOCK; if (*ec) { Log("VAttachVolume: Error reading largeVnode vol header %s; error=%u\n", path, *ec); } } #ifdef AFS_NAMEI_ENV if (!*ec) { struct versionStamp stamp; VOL_UNLOCK; (void)ReadHeader(ec, V_linkHandle(vp), (char *)&stamp, sizeof(stamp), LINKTABLEMAGIC, LINKTABLEVERSION); VOL_LOCK; if (*ec) { Log("VAttachVolume: Error reading namei vol header %s; error=%u\n", path, *ec); } } #endif if (*ec) { Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec); FreeVolume(vp); return NULL; } if (V_needsSalvaged(vp)) { if (vp->specialStatus) vp->specialStatus = 0; Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path); *ec = VSALVAGE; FreeVolume(vp); return NULL; } if (programType == fileServer) { #ifndef FAST_RESTART if (V_inUse(vp) && VolumeWriteable(vp)) { if (!V_needsSalvaged(vp)) { V_needsSalvaged(vp) = 1; VUpdateVolume_r(ec, vp); } FreeVolume(vp); Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path); *ec = VSALVAGE; return NULL; } #endif /* FAST_RESTART */ if (V_destroyMe(vp) == DESTROY_ME) { FreeVolume(vp); Log("VAttachVolume: volume %s is junk; it should be destroyed at next salvage\n", path); *ec = VNOVOL; return NULL; } } AddVolumeToHashTable(vp, V_id(vp)); vp->nextVnodeUnique = V_uniquifier(vp); vp->vnodeIndex[vSmall].bitmap = vp->vnodeIndex[vLarge].bitmap = NULL; #ifndef BITMAP_LATER if (programType == fileServer && VolumeWriteable(vp)) { int i; for (i = 0; i < nVNODECLASSES; i++) { VOL_UNLOCK; GetBitmap(ec, vp, i); VOL_LOCK; if (*ec) { FreeVolume(vp); Log("VAttachVolume: error getting bitmap for volume (%s)\n", path); return NULL; } } } #endif /* BITMAP_LATER */ if (programType == fileServer) { if (vp->specialStatus) vp->specialStatus = 0; if (V_blessed(vp) && V_inService(vp) && !V_needsSalvaged(vp)) { V_inUse(vp) = 1; V_offlineMessage(vp)[0] = '\0'; } } return vp; } /* Attach an existing volume. The volume also normally goes online at this time. An offline volume must be reattached to make it go online. */ Volume * VAttachVolume(Error * ec, VolumeId volumeId, int mode) { Volume *retVal; VATTACH_LOCK; VOL_LOCK; retVal = VAttachVolume_r(ec, volumeId, mode); VOL_UNLOCK; VATTACH_UNLOCK; return retVal; } Volume * VAttachVolume_r(Error * ec, VolumeId volumeId, int mode) { char *part, *name; GetVolumePath(ec, volumeId, &part, &name); if (*ec) { register Volume *vp; Error error; vp = VGetVolume_r(&error, volumeId); if (vp) { assert(V_inUse(vp) == 0); VDetachVolume_r(ec, vp); } return NULL; } return VAttachVolumeByName_r(ec, part, name, mode); } /* Increment a reference count to a volume, sans context swaps. Requires * possibly reading the volume header in from the disk, since there's * an invariant in the volume package that nUsers>0 ==> vp->header is valid. * * N.B. This call can fail if we can't read in the header!! In this case * we still guarantee we won't context swap, but the ref count won't be * incremented (otherwise we'd violate the invariant). */ static int VHold_r(register Volume * vp) { Error error; if (vp->nUsers == 0 && !GetVolumeHeader(vp)) { VolumeReplacements++; ReadHeader(&error, V_diskDataHandle(vp), (char *)&V_disk(vp), sizeof(V_disk(vp)), VOLUMEINFOMAGIC, VOLUMEINFOVERSION); if (error) return error; } vp->nUsers++; return 0; } static int VHold(register Volume * vp) { int retVal; VOL_LOCK; retVal = VHold_r(vp); VOL_UNLOCK; return retVal; } void VTakeOffline_r(register Volume * vp) { assert(vp->nUsers > 0); assert(programType == fileServer); vp->goingOffline = 1; V_needsSalvaged(vp) = 1; } void VTakeOffline(register Volume * vp) { VOL_LOCK; VTakeOffline_r(vp); VOL_UNLOCK; } void VPutVolume_r(register Volume * vp) { assert(--vp->nUsers >= 0); if (vp->nUsers == 0) { ReleaseVolumeHeader(vp->header); if (vp->goingOffline) { Error error; assert(programType == fileServer); vp->goingOffline = 0; V_inUse(vp) = 0; VUpdateVolume_r(&error, vp); VCloseVolumeHandles_r(vp); if (LogLevel) { Log("VOffline: Volume %u (%s) is now offline", V_id(vp), V_name(vp)); if (V_offlineMessage(vp)[0]) Log(" (%s)", V_offlineMessage(vp)); Log("\n"); } #ifdef AFS_PTHREAD_ENV assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0); #else /* AFS_PTHREAD_ENV */ LWP_NoYieldSignal(VPutVolume); #endif /* AFS_PTHREAD_ENV */ } if (vp->shuttingDown) { VReleaseVolumeHandles_r(vp); FreeVolume(vp); if (programType == fileServer) #ifdef AFS_PTHREAD_ENV assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0); #else /* AFS_PTHREAD_ENV */ LWP_NoYieldSignal(VPutVolume); #endif /* AFS_PTHREAD_ENV */ } } } void VPutVolume(register Volume * vp) { VOL_LOCK; VPutVolume_r(vp); VOL_UNLOCK; } /* Get a pointer to an attached volume. The pointer is returned regardless of whether or not the volume is in service or on/off line. An error code, however, is returned with an indication of the volume's status */ Volume * VGetVolume(Error * ec, VolId volumeId) { Volume *retVal; VOL_LOCK; retVal = VGetVolume_r(ec, volumeId); VOL_UNLOCK; return retVal; } Volume * VGetVolume_r(Error * ec, VolId volumeId) { Volume *vp; unsigned short V0 = 0, V1 = 0, V2 = 0, V3 = 0, V4 = 0, V5 = 0, V6 = 0, V7 = 0, V8 = 0, V9 = 0; unsigned short V10 = 0, V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0; for (;;) { *ec = 0; V0++; for (vp = VolumeHashTable[VOLUME_HASH(volumeId)]; vp && vp->hashid != volumeId; vp = vp->hashNext) Vlooks++; if (!vp) { V1++; if (VInit < 2) { V2++; /* Until we have reached an initialization level of 2 * we don't know whether this volume exists or not. * We can't sleep and retry later because before a volume * is attached, the caller tries to get it first. Just * return VOFFLINE and the caller can choose whether to * retry the command or not. */ *ec = VOFFLINE; break; } *ec = VNOVOL; break; } V3++; VolumeGets++; if (vp->nUsers == 0 && !GetVolumeHeader(vp)) { V5++; VolumeReplacements++; ReadHeader(ec, V_diskDataHandle(vp), (char *)&V_disk(vp), sizeof(V_disk(vp)), VOLUMEINFOMAGIC, VOLUMEINFOVERSION); if (*ec) { V6++; /* Only log the error if it was a totally unexpected error. Simply * a missing inode is likely to be caused by the volume being deleted */ if (errno != ENXIO || LogLevel) Log("Volume %u: couldn't reread volume header\n", vp->hashid); FreeVolume(vp); vp = NULL; break; } } V7++; if (vp->shuttingDown) { V8++; *ec = VNOVOL; vp = NULL; break; } if (programType == fileServer) { V9++; if (vp->goingOffline) { V10++; #ifdef AFS_PTHREAD_ENV pthread_cond_wait(&vol_put_volume_cond, &vol_glock_mutex); #else /* AFS_PTHREAD_ENV */ LWP_WaitProcess(VPutVolume); #endif /* AFS_PTHREAD_ENV */ continue; } if (vp->specialStatus) { V11++; *ec = vp->specialStatus; } else if (V_inService(vp) == 0 || V_blessed(vp) == 0) { V12++; *ec = VNOVOL; } else if (V_inUse(vp) == 0) { V13++; *ec = VOFFLINE; } else { V14++; } } break; } V15++; /* if no error, bump nUsers */ if (vp) vp->nUsers++; assert(vp || *ec); return vp; } /* For both VForceOffline and VOffline, we close all relevant handles. * For VOffline, if we re-attach the volume, the files may possible be * different than before. */ static void VReleaseVolumeHandles_r(Volume * vp) { DFlushVolume(V_id(vp)); VReleaseVnodeFiles_r(vp); /* Too time consuming and unnecessary for the volserver */ if (programType != volumeUtility) { IH_CONDSYNC(vp->vnodeIndex[vLarge].handle); IH_CONDSYNC(vp->vnodeIndex[vSmall].handle); IH_CONDSYNC(vp->diskDataHandle); #ifdef AFS_NT40_ENV IH_CONDSYNC(vp->linkHandle); #endif /* AFS_NT40_ENV */ } IH_RELEASE(vp->vnodeIndex[vLarge].handle); IH_RELEASE(vp->vnodeIndex[vSmall].handle); IH_RELEASE(vp->diskDataHandle); IH_RELEASE(vp->linkHandle); } /* Force the volume offline, set the salvage flag. No further references to * the volume through the volume package will be honored. */ void VForceOffline_r(Volume * vp) { Error error; if (!V_inUse(vp)) return; strcpy(V_offlineMessage(vp), "Forced offline due to internal error: volume needs to be salvaged"); Log("Volume %u forced offline: it needs salvaging!\n", V_id(vp)); V_inUse(vp) = 0; vp->goingOffline = 0; V_needsSalvaged(vp) = 1; VUpdateVolume_r(&error, vp); #ifdef AFS_PTHREAD_ENV assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0); #else /* AFS_PTHREAD_ENV */ LWP_NoYieldSignal(VPutVolume); #endif /* AFS_PTHREAD_ENV */ VReleaseVolumeHandles_r(vp); } void VForceOffline(Volume * vp) { VOL_LOCK; VForceOffline_r(vp); VOL_UNLOCK; } /* The opposite of VAttachVolume. The volume header is written to disk, with the inUse bit turned off. A copy of the header is maintained in memory, however (which is why this is VOffline, not VDetach). */ void VOffline_r(Volume * vp, char *message) { Error error; VolumeId vid = V_id(vp); assert(programType != volumeUtility); if (!V_inUse(vp)) { VPutVolume_r(vp); return; } if (V_offlineMessage(vp)[0] == '\0') strncpy(V_offlineMessage(vp), message, sizeof(V_offlineMessage(vp))); V_offlineMessage(vp)[sizeof(V_offlineMessage(vp)) - 1] = '\0'; vp->goingOffline = 1; VPutVolume_r(vp); vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */ if (vp) /* In case it was reattached... */ VPutVolume_r(vp); } void VOffline(Volume * vp, char *message) { VOL_LOCK; VOffline_r(vp, message); VOL_UNLOCK; } /* For VDetachVolume, we close all cached file descriptors, but keep * the Inode handles in case we need to read from a busy volume. */ static void VCloseVolumeHandles_r(Volume * vp) { DFlushVolume(V_id(vp)); VCloseVnodeFiles_r(vp); /* Too time consuming and unnecessary for the volserver */ if (programType != volumeUtility) { IH_CONDSYNC(vp->vnodeIndex[vLarge].handle); IH_CONDSYNC(vp->vnodeIndex[vSmall].handle); IH_CONDSYNC(vp->diskDataHandle); #ifdef AFS_NT40_ENV IH_CONDSYNC(vp->linkHandle); #endif /* AFS_NT40_ENV */ } IH_REALLYCLOSE(vp->vnodeIndex[vLarge].handle); IH_REALLYCLOSE(vp->vnodeIndex[vSmall].handle); IH_REALLYCLOSE(vp->diskDataHandle); IH_REALLYCLOSE(vp->linkHandle); } /* This gets used for the most part by utility routines that don't want * to keep all the volume headers around. Generally, the file server won't * call this routine, because then the offline message in the volume header * (or other information) will still be available to clients. For NAMEI, also * close the file handles. */ void VDetachVolume_r(Error * ec, Volume * vp) { VolumeId volume; struct DiskPartition *tpartp; int notifyServer, useDone; *ec = 0; /* always "succeeds" */ if (programType == volumeUtility) { notifyServer = vp->needsPutBack; useDone = (V_destroyMe(vp) == DESTROY_ME); } tpartp = vp->partition; volume = V_id(vp); DeleteVolumeFromHashTable(vp); vp->shuttingDown = 1; VPutVolume_r(vp); /* Will be detached sometime in the future--this is OK since volume is offline */ if (programType == volumeUtility && notifyServer) { /* * Note: The server is not notified in the case of a bogus volume * explicitly to make it possible to create a volume, do a partial * restore, then abort the operation without ever putting the volume * online. This is essential in the case of a volume move operation * between two partitions on the same server. In that case, there * would be two instances of the same volume, one of them bogus, * which the file server would attempt to put on line */ if (useDone) /* don't put online */ FSYNC_askfs(volume, tpartp->name, FSYNC_DONE, 0); else { /* fs can use it again */ FSYNC_askfs(volume, tpartp->name, FSYNC_ON, 0); /* Dettaching it so break all callbacks on it */ if (V_BreakVolumeCallbacks) { Log("volume %u detached; breaking all call backs\n", volume); (*V_BreakVolumeCallbacks) (volume); } } } } void VDetachVolume(Error * ec, Volume * vp) { VOL_LOCK; VDetachVolume_r(ec, vp); VOL_UNLOCK; } VnodeId VAllocBitmapEntry_r(Error * ec, Volume * vp, register struct vnodeIndex *index) { register byte *bp, *ep; *ec = 0; /* This test is probably redundant */ if (!VolumeWriteable(vp)) { *ec = (bit32) VREADONLY; return 0; } #ifdef BITMAP_LATER if ((programType == fileServer) && !index->bitmap) { int i; int wasVBUSY = 0; if (vp->specialStatus == VBUSY) { if (vp->goingOffline) { /* vos dump waiting for the volume to * go offline. We probably come here * from AddNewReadableResidency */ wasVBUSY = 1; } else { VOL_UNLOCK; while (vp->specialStatus == VBUSY) #ifdef AFS_PTHREAD_ENV sleep(2); #else /* AFS_PTHREAD_ENV */ IOMGR_Sleep(2); #endif /* AFS_PTHREAD_ENV */ VOL_LOCK; } } if (!index->bitmap) { vp->specialStatus = VBUSY; /* Stop anyone else from using it. */ for (i = 0; i < nVNODECLASSES; i++) { VOL_UNLOCK; GetBitmap(ec, vp, i); VOL_LOCK; if (*ec) { vp->specialStatus = 0; vp->shuttingDown = 1; /* Let who has it free it. */ return NULL; } } if (!wasVBUSY) vp->specialStatus = 0; /* Allow others to have access. */ } } #endif /* BITMAP_LATER */ bp = index->bitmap + index->bitmapOffset; ep = index->bitmap + index->bitmapSize; while (bp < ep) { if ((*(bit32 *) bp) != (bit32) 0xffffffff) { int o; index->bitmapOffset = (afs_uint32) (bp - index->bitmap); while (*bp == 0xff) bp++; o = ffs(~*bp) - 1; /* ffs is documented in BSTRING(3) */ *bp |= (1 << o); return (VnodeId) ((bp - index->bitmap) * 8 + o); } bp += sizeof(bit32) /* i.e. 4 */ ; } /* No bit map entry--must grow bitmap */ bp = (byte *) realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE); assert(bp != NULL); index->bitmap = bp; bp += index->bitmapSize; memset(bp, 0, VOLUME_BITMAP_GROWSIZE); index->bitmapOffset = index->bitmapSize; index->bitmapSize += VOLUME_BITMAP_GROWSIZE; *bp = 1; return index->bitmapOffset * 8; } VnodeId VAllocBitmapEntry(Error * ec, Volume * vp, register struct vnodeIndex * index) { VnodeId retVal; VOL_LOCK; retVal = VAllocBitmapEntry_r(ec, vp, index); VOL_UNLOCK; return retVal; } void VFreeBitMapEntry_r(Error * ec, register struct vnodeIndex *index, unsigned bitNumber) { unsigned int offset; *ec = 0; #ifdef BITMAP_LATER if (!index->bitmap) return; #endif /* BITMAP_LATER */ offset = bitNumber >> 3; if (offset >= index->bitmapSize) { *ec = VNOVNODE; return; } if (offset < index->bitmapOffset) index->bitmapOffset = offset & ~3; /* Truncate to nearest bit32 */ *(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7)); } void VFreeBitMapEntry(Error * ec, register struct vnodeIndex *index, unsigned bitNumber) { VOL_LOCK; VFreeBitMapEntry_r(ec, index, bitNumber); VOL_UNLOCK; } void VUpdateVolume_r(Error * ec, Volume * vp) { *ec = 0; if (programType == fileServer) V_uniquifier(vp) = (V_inUse(vp) ? V_nextVnodeUnique(vp) + 200 : V_nextVnodeUnique(vp)); /*printf("Writing volume header for '%s'\n", V_name(vp)); */ WriteVolumeHeader_r(ec, vp); if (*ec) { Log("VUpdateVolume: error updating volume header, volume %u (%s)\n", V_id(vp), V_name(vp)); VForceOffline_r(vp); } } void VUpdateVolume(Error * ec, Volume * vp) { VOL_LOCK; VUpdateVolume_r(ec, vp); VOL_UNLOCK; } void VSyncVolume_r(Error * ec, Volume * vp) { FdHandle_t *fdP; VUpdateVolume_r(ec, vp); if (!ec) { int code; fdP = IH_OPEN(V_diskDataHandle(vp)); assert(fdP != NULL); code = FDH_SYNC(fdP); assert(code == 0); FDH_CLOSE(fdP); } } void VSyncVolume(Error * ec, Volume * vp) { VOL_LOCK; VSyncVolume_r(ec, vp); VOL_UNLOCK; } static void FreeVolume(Volume * vp) { int i; if (!vp) return; for (i = 0; i < nVNODECLASSES; i++) if (vp->vnodeIndex[i].bitmap) free(vp->vnodeIndex[i].bitmap); FreeVolumeHeader(vp); DeleteVolumeFromHashTable(vp); free(vp); } static void GetBitmap(Error * ec, Volume * vp, VnodeClass class) { StreamHandle_t *file; int nVnodes; int size; struct VnodeClassInfo *vcp = &VnodeClassInfo[class]; struct vnodeIndex *vip = &vp->vnodeIndex[class]; struct VnodeDiskObject *vnode; unsigned int unique = 0; FdHandle_t *fdP; #ifdef BITMAP_LATER byte *BitMap = 0; #endif /* BITMAP_LATER */ *ec = 0; fdP = IH_OPEN(vip->handle); assert(fdP != NULL); file = FDH_FDOPEN(fdP, "r"); assert(file != NULL); vnode = (VnodeDiskObject *) malloc(vcp->diskSize); assert(vnode != NULL); size = OS_SIZE(fdP->fd_fd); assert(size != -1); nVnodes = (size <= vcp->diskSize ? 0 : size - vcp->diskSize) >> vcp->logSize; vip->bitmapSize = ((nVnodes / 8) + 10) / 4 * 4; /* The 10 is a little extra so * a few files can be created in this volume, * the whole thing is rounded up to nearest 4 * bytes, because the bit map allocator likes * it that way */ #ifdef BITMAP_LATER BitMap = (byte *) calloc(1, vip->bitmapSize); assert(BitMap != NULL); #else /* BITMAP_LATER */ vip->bitmap = (byte *) calloc(1, vip->bitmapSize); assert(vip->bitmap != NULL); vip->bitmapOffset = 0; #endif /* BITMAP_LATER */ if (STREAM_SEEK(file, vcp->diskSize, 0) != -1) { int bitNumber = 0; for (bitNumber = 0; bitNumber < nVnodes + 100; bitNumber++) { if (STREAM_READ(vnode, vcp->diskSize, 1, file) != 1) break; if (vnode->type != vNull) { if (vnode->vnodeMagic != vcp->magic) { Log("GetBitmap: addled vnode index in volume %s; volume needs salvage\n", V_name(vp)); *ec = VSALVAGE; break; } #ifdef BITMAP_LATER *(BitMap + (bitNumber >> 3)) |= (1 << (bitNumber & 0x7)); #else /* BITMAP_LATER */ *(vip->bitmap + (bitNumber >> 3)) |= (1 << (bitNumber & 0x7)); #endif /* BITMAP_LATER */ if (unique <= vnode->uniquifier) unique = vnode->uniquifier + 1; } #ifndef AFS_PTHREAD_ENV if ((bitNumber & 0x00ff) == 0x0ff) { /* every 256 iterations */ IOMGR_Poll(); } #endif /* !AFS_PTHREAD_ENV */ } } if (vp->nextVnodeUnique < unique) { Log("GetBitmap: bad volume uniquifier for volume %s; volume needs salvage\n", V_name(vp)); *ec = VSALVAGE; } /* Paranoia, partly justified--I think fclose after fdopen * doesn't seem to close fd. In any event, the documentation * doesn't specify, so it's safer to close it twice. */ STREAM_CLOSE(file); FDH_CLOSE(fdP); free(vnode); #ifdef BITMAP_LATER /* There may have been a racing condition with some other thread, both * creating the bitmaps for this volume. If the other thread was faster * the pointer to bitmap should already be filled and we can free ours. */ if (vip->bitmap == NULL) { vip->bitmap = BitMap; vip->bitmapOffset = 0; } else free((byte *) BitMap); #endif /* BITMAP_LATER */ } static void GetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep) { static char partition[VMAXPATHLEN], name[VMAXPATHLEN]; char path[VMAXPATHLEN]; int found = 0; struct DiskPartition *dp; *ec = 0; name[0] = '/'; (void)afs_snprintf(&name[1], (sizeof name) - 1, VFORMAT, volumeId); for (dp = DiskPartitionList; dp; dp = dp->next) { struct afs_stat status; strcpy(path, VPartitionPath(dp)); strcat(path, name); if (afs_stat(path, &status) == 0) { strcpy(partition, dp->name); found = 1; break; } } if (!found) { *ec = VNOVOL; *partitionp = *namep = NULL; } else { *partitionp = partition; *namep = name; } } int VolumeNumber(char *name) { if (*name == '/') name++; return atoi(name + 1); } char * VolumeExternalName(VolumeId volumeId) { static char name[VMAXPATHLEN]; (void)afs_snprintf(name, sizeof name, VFORMAT, volumeId); return name; } #if OPENAFS_VOL_STATS #define OneDay (86400) /* 24 hours' worth of seconds */ #else #define OneDay (24*60*60) /* 24 hours */ #endif /* OPENAFS_VOL_STATS */ #define Midnight(date) ((date-TimeZoneCorrection)/OneDay*OneDay+TimeZoneCorrection) /*------------------------------------------------------------------------ * [export] VAdjustVolumeStatistics * * Description: * If we've passed midnight, we need to update all the day use * statistics as well as zeroing the detailed volume statistics * (if we are implementing them). * * Arguments: * vp : Pointer to the volume structure describing the lucky * volume being considered for update. * * Returns: * 0 (always!) * * Environment: * Nothing interesting. * * Side Effects: * As described. *------------------------------------------------------------------------*/ int VAdjustVolumeStatistics_r(register Volume * vp) { unsigned int now = FT_ApproxTime(); if (now - V_dayUseDate(vp) > OneDay) { register ndays, i; ndays = (now - V_dayUseDate(vp)) / OneDay; for (i = 6; i > ndays - 1; i--) V_weekUse(vp)[i] = V_weekUse(vp)[i - ndays]; for (i = 0; i < ndays - 1 && i < 7; i++) V_weekUse(vp)[i] = 0; if (ndays <= 7) V_weekUse(vp)[ndays - 1] = V_dayUse(vp); V_dayUse(vp) = 0; V_dayUseDate(vp) = Midnight(now); #if OPENAFS_VOL_STATS /* * All we need to do is bzero the entire VOL_STATS_BYTES of * the detailed volume statistics area. */ memset((char *)(V_stat_area(vp)), 0, VOL_STATS_BYTES); #endif /* OPENAFS_VOL_STATS */ } /*It's been more than a day of collection */ /* * Always return happily. */ return (0); } /*VAdjustVolumeStatistics */ int VAdjustVolumeStatistics(register Volume * vp) { int retVal; VOL_LOCK; retVal = VAdjustVolumeStatistics_r(vp); VOL_UNLOCK; return retVal; } void VBumpVolumeUsage_r(register Volume * vp) { unsigned int now = FT_ApproxTime(); if (now - V_dayUseDate(vp) > OneDay) VAdjustVolumeStatistics_r(vp); /* * Save the volume header image to disk after every 128 bumps to dayUse. */ if ((V_dayUse(vp)++ & 127) == 0) { Error error; VUpdateVolume_r(&error, vp); } } void VBumpVolumeUsage(register Volume * vp) { VOL_LOCK; VBumpVolumeUsage_r(vp); VOL_UNLOCK; } void VSetDiskUsage_r(void) { static int FifteenMinuteCounter = 0; while (VInit < 2) { /* NOTE: Don't attempt to access the partitions list until the * initialization level indicates that all volumes are attached, * which implies that all partitions are initialized. */ #ifdef AFS_PTHREAD_ENV sleep(10); #else /* AFS_PTHREAD_ENV */ IOMGR_Sleep(10); #endif /* AFS_PTHREAD_ENV */ } VResetDiskUsage_r(); if (++FifteenMinuteCounter == 3) { FifteenMinuteCounter = 0; VScanUpdateList(); } } void VSetDiskUsage(void) { VOL_LOCK; VSetDiskUsage_r(); VOL_UNLOCK; } /* The number of minutes that a volume hasn't been updated before the * "Dont salvage" flag in the volume header will be turned on */ #define SALVAGE_INTERVAL (10*60) static VolumeId *UpdateList; /* Pointer to array of Volume ID's */ static int nUpdatedVolumes; /* Updated with entry in UpdateList, salvage after crash flag on */ static int updateSize; /* number of entries possible */ #define UPDATE_LIST_SIZE 100 /* size increment */ void VAddToVolumeUpdateList_r(Error * ec, Volume * vp) { *ec = 0; vp->updateTime = FT_ApproxTime(); if (V_dontSalvage(vp) == 0) return; V_dontSalvage(vp) = 0; VSyncVolume_r(ec, vp); if (*ec) return; if (!UpdateList) { updateSize = UPDATE_LIST_SIZE; UpdateList = (VolumeId *) malloc(sizeof(VolumeId) * updateSize); } else { if (nUpdatedVolumes == updateSize) { updateSize += UPDATE_LIST_SIZE; UpdateList = (VolumeId *) realloc(UpdateList, sizeof(VolumeId) * updateSize); } } assert(UpdateList != NULL); UpdateList[nUpdatedVolumes++] = V_id(vp); } static void VScanUpdateList(void) { register int i, gap; register Volume *vp; Error error; afs_uint32 now = FT_ApproxTime(); /* Be careful with this code, since it works with interleaved calls to AddToVolumeUpdateList */ for (i = gap = 0; i < nUpdatedVolumes; i++) { vp = VGetVolume_r(&error, UpdateList[i - gap] = UpdateList[i]); if (error) { gap++; } else if (vp->nUsers == 1 && now - vp->updateTime > SALVAGE_INTERVAL) { V_dontSalvage(vp) = DONT_SALVAGE; VUpdateVolume_r(&error, vp); /* No need to fsync--not critical */ gap++; } if (vp) VPutVolume_r(vp); #ifndef AFS_PTHREAD_ENV IOMGR_Poll(); #endif /* !AFS_PTHREAD_ENV */ } nUpdatedVolumes -= gap; } /***************************************************/ /* Add on routines to manage a volume header cache */ /***************************************************/ static struct volHeader *volumeLRU; /* Allocate a bunch of headers; string them together */ static void InitLRU(int howMany) { register struct volHeader *hp; if (programType != fileServer) return; hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader))); while (howMany--) ReleaseVolumeHeader(hp++); } /* Get a volume header from the LRU list; update the old one if necessary */ /* Returns 1 if there was already a header, which is removed from the LRU list */ static int GetVolumeHeader(register Volume * vp) { Error error; register struct volHeader *hd; int old; static int everLogged = 0; old = (vp->header != NULL); /* old == volume already has a header */ if (programType != fileServer) { if (!vp->header) { hd = (struct volHeader *)calloc(1, sizeof(*vp->header)); assert(hd != NULL); vp->header = hd; hd->back = vp; } } else { if (old) { hd = vp->header; if (volumeLRU == hd) volumeLRU = hd->next; assert(hd->back == vp); } else { if (volumeLRU) /* not currently in use and least recently used */ hd = volumeLRU->prev; else { hd = (struct volHeader *)calloc(1, sizeof(*vp->header)); /* make it look like single elt LRU */ hd->prev = hd->next = hd; if (!everLogged) { Log("****Allocated more volume headers, probably leak****\n"); everLogged = 1; } } if (hd->back) { if (hd->diskstuff.inUse) { WriteVolumeHeader_r(&error, hd->back); /* Ignore errors; catch them later */ } hd->back->header = 0; } hd->back = vp; vp->header = hd; } if (hd->next) { /* hd->next != 0 --> in LRU chain (we zero it later) */ hd->prev->next = hd->next; /* pull hd out of LRU list */ hd->next->prev = hd->prev; /* if hd only element, this is noop */ } hd->next = hd->prev = 0; /* if not in LRU chain, next test won't be true */ if (hd == volumeLRU) /* last header item, turn into empty list */ volumeLRU = NULL; } return old; } /* Put it at the top of the LRU chain */ static void ReleaseVolumeHeader(register struct volHeader *hd) { if (programType != fileServer) return; if (!hd || hd->next) /* no header, or header already released */ return; if (!volumeLRU) { hd->next = hd->prev = hd; } else { hd->prev = volumeLRU->prev; hd->next = volumeLRU; hd->prev->next = hd->next->prev = hd; } volumeLRU = hd; } static void FreeVolumeHeader(register Volume * vp) { register struct volHeader *hd = vp->header; if (!hd) return; if (programType == fileServer) { ReleaseVolumeHeader(hd); hd->back = 0; } else { free(hd); } vp->header = 0; } /***************************************************/ /* Routines to add volume to hash chain, delete it */ /***************************************************/ static void AddVolumeToHashTable(register Volume * vp, int hashid) { int hash = VOLUME_HASH(hashid); vp->hashid = hashid; vp->hashNext = VolumeHashTable[hash]; VolumeHashTable[hash] = vp; vp->vnodeHashOffset = VolumeHashOffset_r(); } static void DeleteVolumeFromHashTable(register Volume * vp) { int hash = VOLUME_HASH(vp->hashid); if (VolumeHashTable[hash] == vp) VolumeHashTable[hash] = vp->hashNext; else { Volume *tvp = VolumeHashTable[hash]; if (tvp == NULL) return; while (tvp->hashNext && tvp->hashNext != vp) tvp = tvp->hashNext; if (tvp->hashNext == NULL) return; tvp->hashNext = vp->hashNext; } vp->hashid = 0; } void VPrintCacheStats_r(void) { register struct VnodeClassInfo *vcp; vcp = &VnodeClassInfo[vLarge]; Log("Large vnode cache, %d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes); vcp = &VnodeClassInfo[vSmall]; Log("Small vnode cache,%d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes); Log("Volume header cache, %d entries, %d gets, %d replacements\n", VolumeCacheSize, VolumeGets, VolumeReplacements); } void VPrintCacheStats(void) { VOL_LOCK; VPrintCacheStats_r(); VOL_UNLOCK; }