#endif
-#define VOLUME_BITMAP_GROWSIZE 16 /* bytes, => 128vnodes */
- /* Must be a multiple of 4 (1 word) !! */
-
/* this parameter needs to be tunable at runtime.
* 128 was really inadequate for largish servers -- at 16384 volumes this
* puts average chain length at 128, thus an average 65 deref's to find a volptr.
opts->interrupt_rxcall = NULL;
opts->offline_timeout = -1;
opts->offline_shutdown_timeout = -1;
+ opts->usage_threshold = 128;
+ opts->usage_rate_limit = 5;
#ifdef FAST_RESTART
opts->unsafe_attach = 1;
/* create partition work queue */
for (parts=0, diskP = DiskPartitionList; diskP; diskP = diskP->next, parts++) {
- dpq = (diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
+ dpq = malloc(sizeof(struct diskpartition_queue_t));
osi_Assert(dpq != NULL);
dpq->diskP = diskP;
queue_Append(¶ms,dpq);
}
- threads = MIN(parts, vol_attach_threads);
+ threads = min(parts, vol_attach_threads);
if (threads > 1) {
/* spawn off a bunch of initialization threads */
MUTEX_INIT(&(pq.mutex), "partq", MUTEX_DEFAULT, 0);
for (parts = 0, diskP = DiskPartitionList; diskP; diskP = diskP->next, parts++) {
struct diskpartition_queue_t *dp;
- dp = (struct diskpartition_queue_t*)malloc(sizeof(struct diskpartition_queue_t));
+ dp = malloc(sizeof(struct diskpartition_queue_t));
osi_Assert(dp != NULL);
dp->diskP = diskP;
queue_Append(&pq, dp);
}
/* number of worker threads; at least one, not to exceed the number of partitions */
- threads = MIN(parts, vol_attach_threads);
+ threads = min(parts, vol_attach_threads);
/* create volume work queue */
queue_Init(&vq);
struct vinitvolumepackage_thread_param *params;
AFS_SIGSET_DECL;
- params = (struct vinitvolumepackage_thread_param *)malloc(sizeof(struct vinitvolumepackage_thread_param));
+ params = malloc(sizeof(struct vinitvolumepackage_thread_param));
osi_Assert(params);
params->pq = &pq;
params->vq = &vq;
osi_Assert(pq);
osi_Assert(vq);
- vb = (struct volume_init_batch*)malloc(sizeof(struct volume_init_batch));
+ vb = malloc(sizeof(struct volume_init_batch));
osi_Assert(vb);
vb->thread = params->thread;
vb->last = 0;
continue;
}
while ((vid = VInitNextVolumeId(dirp))) {
- Volume *vp = (Volume*)malloc(sizeof(Volume));
+ Volume *vp = calloc(1, sizeof(Volume));
osi_Assert(vp);
- memset(vp, 0, sizeof(Volume));
vp->device = partition->device;
vp->partition = partition;
vp->hashid = vid;
CV_BROADCAST(&vq->cv);
MUTEX_EXIT(&vq->mutex);
- vb = (struct volume_init_batch*)malloc(sizeof(struct volume_init_batch));
+ vb = malloc(sizeof(struct volume_init_batch));
osi_Assert(vb);
vb->thread = params->thread;
vb->size = 0;
/* build up the pass 0 shutdown work queue */
- dpq = (struct diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
+ dpq = malloc(sizeof(struct diskpartition_queue_t));
osi_Assert(dpq != NULL);
dpq->diskP = diskP;
queue_Prepend(¶ms, dpq);
/* Header I/O routines */
/***************************************************/
+static const char *
+HeaderName(bit32 magic)
+{
+ switch (magic) {
+ case VOLUMEINFOMAGIC:
+ return "volume info";
+ case SMALLINDEXMAGIC:
+ return "small index";
+ case LARGEINDEXMAGIC:
+ return "large index";
+ case LINKTABLEMAGIC:
+ return "link table";
+ }
+ return "unknown";
+}
+
/* open a descriptor for the inode (h),
* read in an on-disk structure into buffer (to) of size (size),
* verify versionstamp in structure has magic (magic) and
{
struct versionStamp *vsn;
FdHandle_t *fdP;
+ afs_sfsize_t nbytes;
+ afs_ino_str_t stmp;
*ec = 0;
if (h == NULL) {
+ Log("ReadHeader: Null inode handle argument for %s header file.\n",
+ HeaderName(magic));
*ec = VSALVAGE;
return;
}
fdP = IH_OPEN(h);
if (fdP == NULL) {
+ Log("ReadHeader: Failed to open %s header file "
+ "(volume=%u, inode=%s); errno=%d\n", HeaderName(magic), h->ih_vid,
+ PrintInode(stmp, h->ih_ino), errno);
*ec = VSALVAGE;
return;
}
vsn = (struct versionStamp *)to;
- if (FDH_PREAD(fdP, to, size, 0) != size || vsn->magic != magic) {
+ nbytes = FDH_PREAD(fdP, to, size, 0);
+ if (nbytes < 0) {
+ Log("ReadHeader: Failed to read %s header file "
+ "(volume=%u, inode=%s); errno=%d\n", HeaderName(magic), h->ih_vid,
+ PrintInode(stmp, h->ih_ino), errno);
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ if (nbytes != size) {
+ Log("ReadHeader: Incorrect number of bytes read from %s header file "
+ "(volume=%u, inode=%s); expected=%d, read=%d\n",
+ HeaderName(magic), h->ih_vid, PrintInode(stmp, h->ih_ino), size,
+ (int)nbytes);
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ if (vsn->magic != magic) {
+ Log("ReadHeader: Incorrect magic for %s header file "
+ "(volume=%u, inode=%s); expected=0x%x, read=0x%x\n",
+ HeaderName(magic), h->ih_vid, PrintInode(stmp, h->ih_ino), magic,
+ vsn->magic);
*ec = VSALVAGE;
FDH_REALLYCLOSE(fdP);
return;
}
+
FDH_CLOSE(fdP);
/* Check is conditional, in case caller wants to inspect version himself */
if (version && vsn->version != version) {
+ Log("ReadHeader: Incorrect version for %s header file "
+ "(volume=%u, inode=%s); expected=%x, read=%x\n",
+ HeaderName(magic), h->ih_vid, PrintInode(stmp, h->ih_ino),
+ version, vsn->version);
*ec = VSALVAGE;
}
}
* - volume is in an error state
* - volume is pre-attached
*/
- Log("VPreattachVolumeByVp_r: volume %u not in quiescent state\n", vid);
+ Log("VPreattachVolumeByVp_r: volume %u not in quiescent state (state %u flags 0x%x)\n",
+ vid, V_attachState(vp), V_attachFlags(vp));
goto done;
} else if (vp) {
/* we're re-attaching a volume; clear out some old state */
VOL_UNLOCK;
/* allocate the volume structure */
- vp = nvp = (Volume *) malloc(sizeof(Volume));
+ vp = nvp = calloc(1, sizeof(Volume));
osi_Assert(vp != NULL);
- memset(vp, 0, sizeof(Volume));
queue_Init(&vp->vnode_list);
queue_Init(&vp->rx_call_list);
CV_INIT(&V_attachCV(vp), "vp attach", CV_DEFAULT, 0);
}
if (*ec) {
+ VOL_LOCK;
+ FreeVolumeHeader(vp);
+ VOL_UNLOCK;
return;
}
if (retry) {
if (!*ec) {
struct IndexFileHeader iHead;
-#if OPENAFS_VOL_STATS
/*
* We just read in the diskstuff part of the header. If the detailed
* volume stats area has not yet been initialized, we should bzero the
memset((V_stat_area(vp)), 0, VOL_STATS_BYTES);
V_stat_initialized(vp) = 1;
}
-#endif /* OPENAFS_VOL_STATS */
(void)ReadHeader(ec, vp->vnodeIndex[vSmall].handle,
(char *)&iHead, sizeof(iHead),
goto locked_error;
} else if (*ec) {
/* volume operation in progress */
- goto unlocked_error;
+ VOL_LOCK;
+ /* we have already transitioned the vp away from ATTACHING state, so we
+ * can go right to the end of attach2, and we do not need to transition
+ * to ERROR. */
+ goto error_notbroken;
}
#else /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
locked_error:
#ifdef AFS_DEMAND_ATTACH_FS
if (!VIsErrorState(V_attachState(vp))) {
+ if (VIsErrorState(error_state)) {
+ Log("attach2: forcing vol %u to error state (state %u flags 0x%x ec %d)\n",
+ vp->hashid, V_attachState(vp), V_attachFlags(vp), *ec);
+ }
VChangeState_r(vp, error_state);
}
#endif /* AFS_DEMAND_ATTACH_FS */
}
#ifdef AFS_DEMAND_ATTACH_FS
+ error_notbroken:
VCheckSalvage(vp);
if (forcefree) {
FreeVolume(vp);
* - VOL_STATE_SHUTTING_DOWN
*/
if ((V_attachState(vp) == VOL_STATE_ERROR) ||
- (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN) ||
- (V_attachState(vp) == VOL_STATE_GOING_OFFLINE)) {
+ (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN)) {
*ec = VNOVOL;
vp = NULL;
break;
}
/*
- * short circuit with VOFFLINE for VOL_STATE_UNATTACHED and
+ * short circuit with VOFFLINE for VOL_STATE_UNATTACHED/GOING_OFFLINE and
* VNOVOL for VOL_STATE_DELETED
*/
if ((V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) ||
(V_attachState(vp) == VOL_STATE_DELETED)) {
if (vp->specialStatus) {
*ec = vp->specialStatus;
case VSALVAGING:
break;
case VOFFLINE:
- if (!vp->pending_vol_op) {
- endloop = 1;
+ endloop = 1;
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
}
break;
+
default:
- *ec = VNOVOL;
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
+ } else {
+ *ec = VNOVOL;
+ }
endloop = 1;
}
if (endloop) {
vp = NULL;
break;
}
-#endif
-#ifdef AFS_DEMAND_ATTACH_FS
+ if (VIsErrorState(V_attachState(vp))) {
+ /* make sure we don't take a vp in VOL_STATE_ERROR state and use
+ * it, or transition it out of that state */
+ if (!*ec) {
+ *ec = VNOVOL;
+ }
+ vp = NULL;
+ break;
+ }
+
/*
- * this test MUST happen after VAttachVolymeByVp, so vol_op_state is
- * not VolOpRunningUnknown (attach2 would have converted it to Online
- * or Offline)
+ * this test MUST happen after VAttachVolymeByVp, so we have no
+ * conflicting vol op. (attach2 would have errored out if we had one;
+ * specifically attach_check_vop must have detected a conflicting vop)
*/
+ osi_Assert(!vp->pending_vol_op || vp->pending_vol_op->vol_op_state == FSSYNC_VolOpRunningOnline);
- /* only valid before/during demand attachment */
- osi_Assert(!vp->pending_vol_op || vp->pending_vol_op->vol_op_state != FSSYNC_VolOpRunningUnknown);
-
- /* deny getvolume due to running mutually exclusive vol op */
- if (vp->pending_vol_op && vp->pending_vol_op->vol_op_state==FSSYNC_VolOpRunningOffline) {
- /*
- * volume cannot remain online during this volume operation.
- * notify client.
- */
- if (vp->specialStatus) {
- /*
- * special status codes outrank normal VOFFLINE code
- */
- *ec = vp->specialStatus;
- if (client_ec) {
- *client_ec = vp->specialStatus;
- }
- } else {
- if (client_ec) {
- /* see CheckVnode() in afsfileprocs.c for an explanation
- * of this error code logic */
- afs_uint32 now = FT_ApproxTime();
- if ((vp->stats.last_vol_op + (10 * 60)) >= now) {
- *client_ec = VBUSY;
- } else {
- *client_ec = VRESTARTING;
- }
- }
- *ec = VOFFLINE;
- }
- VChangeState_r(vp, VOL_STATE_UNATTACHED);
- FreeVolumeHeader(vp);
- vp = NULL;
- break;
- }
#endif /* AFS_DEMAND_ATTACH_FS */
LoadVolumeHeader(ec, vp);
FSSYNC_VolOp_info * info;
/* attach a vol op info node to the volume struct */
- info = (FSSYNC_VolOp_info *) malloc(sizeof(FSSYNC_VolOp_info));
+ info = malloc(sizeof(FSSYNC_VolOp_info));
osi_Assert(info != NULL);
memcpy(info, vopinfo, sizeof(FSSYNC_VolOp_info));
vp->pending_vol_op = info;
Log("VScheduleSalvage_r: Salvage request for volume %lu "
"denied\n", afs_printable_uint32_lu(vp->hashid));
break;
+ case SYNC_FAILED:
+ Log("VScheduleSalvage_r: Salvage request for volume %lu "
+ "failed\n", afs_printable_uint32_lu(vp->hashid));
+ break;
default:
Log("VScheduleSalvage_r: Salvage request for volume %lu "
"received unknown protocol error %d\n",
/* volume bitmap routines */
/***************************************************/
+/*
+ * Grow the bitmap by the defined increment
+ */
+void
+VGrowBitmap(struct vnodeIndex *index)
+{
+ byte *bp;
+
+ bp = realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE);
+ osi_Assert(bp != NULL);
+ index->bitmap = bp;
+ bp += index->bitmapSize;
+ memset(bp, 0, VOLUME_BITMAP_GROWSIZE);
+ index->bitmapOffset = index->bitmapSize;
+ index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
+
+ return;
+}
+
/**
* allocate a vnode bitmap number for the vnode
*
bp += sizeof(bit32) /* i.e. 4 */ ;
}
/* No bit map entry--must grow bitmap */
- bp = (byte *)
- realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE);
- osi_Assert(bp != NULL);
- index->bitmap = bp;
- bp += index->bitmapSize;
- memset(bp, 0, VOLUME_BITMAP_GROWSIZE);
- index->bitmapOffset = index->bitmapSize;
- index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
+ VGrowBitmap(index);
+ bp = index->bitmap;
*bp = 1;
ret = index->bitmapOffset * 8;
#ifdef AFS_DEMAND_ATTACH_FS
osi_Assert(fdP != NULL);
file = FDH_FDOPEN(fdP, "r");
osi_Assert(file != NULL);
- vnode = (VnodeDiskObject *) malloc(vcp->diskSize);
+ vnode = malloc(vcp->diskSize);
osi_Assert(vnode != NULL);
size = OS_SIZE(fdP->fd_fd);
osi_Assert(size != -1);
vip->bitmap = BitMap;
vip->bitmapOffset = 0;
} else
- free((byte *) BitMap);
+ free(BitMap);
#endif /* BITMAP_LATER */
#ifdef AFS_DEMAND_ATTACH_FS
VChangeState_r(vp, state_save);
/* Volume Usage Statistics routines */
/***************************************************/
-#if OPENAFS_VOL_STATS
#define OneDay (86400) /* 24 hours' worth of seconds */
-#else
-#define OneDay (24*60*60) /* 24 hours */
-#endif /* OPENAFS_VOL_STATS */
static time_t
Midnight(time_t t) {
V_dayUse(vp) = 0;
V_dayUseDate(vp) = Midnight(now);
-#if OPENAFS_VOL_STATS
/*
* All we need to do is bzero the entire VOL_STATS_BYTES of
* the detailed volume statistics area.
*/
memset((V_stat_area(vp)), 0, VOL_STATS_BYTES);
-#endif /* OPENAFS_VOL_STATS */
- }
+ }
/*It's been more than a day of collection */
/*
if (now - V_dayUseDate(vp) > OneDay)
VAdjustVolumeStatistics_r(vp);
/*
- * Save the volume header image to disk after every 128 bumps to dayUse.
+ * Save the volume header image to disk after a threshold of bumps to dayUse,
+ * at most every usage_rate_limit seconds.
*/
- if ((V_dayUse(vp)++ & 127) == 0) {
+ V_dayUse(vp)++;
+ vp->usage_bumps_outstanding++;
+ if (vp->usage_bumps_outstanding >= vol_opts.usage_threshold
+ && vp->usage_bumps_next_write <= now) {
Error error;
+ vp->usage_bumps_outstanding = 0;
+ vp->usage_bumps_next_write = now + vol_opts.usage_rate_limit;
VUpdateVolume_r(&error, vp, VOL_UPDATE_WAIT);
}
}
return;
if (UpdateList == NULL) {
updateSize = UPDATE_LIST_SIZE;
- UpdateList = (VolumeId *) malloc(sizeof(VolumeId) * updateSize);
+ UpdateList = malloc(sizeof(VolumeId) * updateSize);
} else {
if (nUpdatedVolumes == updateSize) {
updateSize <<= 1;
Log("warning: there is likely a bug in the volume update scanner\n");
return;
}
- UpdateList =
- (VolumeId *) realloc(UpdateList,
- sizeof(VolumeId) * updateSize);
+ UpdateList = realloc(UpdateList,
+ sizeof(VolumeId) * updateSize);
}
}
osi_Assert(UpdateList != NULL);
/* no big deal if this allocation fails */
if (volume_LRU.q[idx].len) {
- salv_flag_vec = (Volume **) malloc(volume_LRU.q[idx].len * sizeof(Volume *));
+ salv_flag_vec = malloc(volume_LRU.q[idx].len * sizeof(Volume *));
}
now = FT_ApproxTime();
* could VWaitExclusiveState_r instead, but not waiting is faster and
* easier to do */
for (queue_Scan(&volume_hdr_LRU, qh, nqh, volHeader)) {
- if (!hd->back || !VIsExclusiveState(V_attachState(hd->back))) {
+ if (!qh->back || !VIsExclusiveState(V_attachState(qh->back))) {
queue_Remove(qh);
hd = qh;
break;
if (programType != fileServer) {
/* for volume utilities, we allocate volHeaders as needed */
if (!vp->header) {
- hd = (struct volHeader *)calloc(1, sizeof(*vp->header));
+ hd = calloc(1, sizeof(*vp->header));
osi_Assert(hd != NULL);
vp->header = hd;
hd->back = vp;
if (!hd) {
/* LRU is empty, so allocate a new volHeader
* this is probably indicative of a leak, so let the user know */
- hd = (struct volHeader *)calloc(1, sizeof(struct volHeader));
+ hd = calloc(1, sizeof(struct volHeader));
osi_Assert(hd != NULL);
if (!everLogged) {
Log("****Allocated more volume headers, probably leak****\n");
/* search the chain for this volume id */
for(queue_Scan(head, vp, np, Volume)) {
looks++;
- if ((vp->hashid == volumeId)) {
+ if (vp->hashid == volumeId) {
break;
}
}