/* Forward declarations */
static Volume *attach2(Error * ec, VolId vid, char *path,
register struct VolumeHeader *header,
- struct DiskPartition *partp, Volume * vp,
+ struct DiskPartition64 *partp, Volume * vp,
int isbusy, int mode);
static void ReallyFreeVolume(Volume * vp);
#ifdef AFS_DEMAND_ATTACH_FS
static int VHold(Volume * vp);
static int VHold_r(Volume * vp);
static void VGetBitmap_r(Error * ec, Volume * vp, VnodeClass class);
-static void GetVolumePath(Error * ec, VolId volumeId, char **partitionp,
- char **namep);
static void VReleaseVolumeHandles_r(Volume * vp);
static void VCloseVolumeHandles_r(Volume * vp);
static void LoadVolumeHeader(Error * ec, Volume * vp);
#ifdef AFS_PTHREAD_ENV
typedef struct diskpartition_queue_t {
struct rx_queue queue;
- struct DiskPartition * diskP;
+ struct DiskPartition64 * diskP;
} diskpartition_queue_t;
typedef struct vinitvolumepackage_thread_t {
struct rx_queue queue;
static void * VInitVolumePackageThread(void * args);
#endif /* AFS_PTHREAD_ENV */
-static int VAttachVolumesByPartition(struct DiskPartition *diskP,
+static int VAttachVolumesByPartition(struct DiskPartition64 *diskP,
int * nAttached, int * nUnattached);
/* VByP List */
static void AddVolumeToVByPList_r(Volume * vp);
static void DeleteVolumeFromVByPList_r(Volume * vp);
-static void VVByPListBeginExclusive_r(struct DiskPartition * dp);
-static void VVByPListEndExclusive_r(struct DiskPartition * dp);
-static void VVByPListWait_r(struct DiskPartition * dp);
+static void VVByPListBeginExclusive_r(struct DiskPartition64 * dp);
+static void VVByPListEndExclusive_r(struct DiskPartition64 * dp);
+static void VVByPListWait_r(struct DiskPartition64 * dp);
/* online salvager */
static int VCheckSalvage(register Volume * vp);
static void VHashWait_r(VolumeHashChainHead * head);
/* shutdown */
-static int ShutdownVByPForPass_r(struct DiskPartition * dp, int pass);
-static int ShutdownVolumeWalk_r(struct DiskPartition * dp, int pass,
+static int ShutdownVByPForPass_r(struct DiskPartition64 * dp, int pass);
+static int ShutdownVolumeWalk_r(struct DiskPartition64 * dp, int pass,
struct rx_queue ** idx);
static void ShutdownController(vshutdown_thread_t * params);
static void ShutdownCreateSchedule(vshutdown_thread_t * params);
static int VCheckSoftDetach(volatile Volume * vp, afs_uint32 thresh);
static int VCheckSoftDetachCandidate(volatile Volume * vp, afs_uint32 thresh);
static int VSoftDetachVolume_r(volatile Volume * vp, afs_uint32 thresh);
+
+
+pthread_key_t VThread_key;
+VThreadOptions_t VThread_defaults = {
+ 0 /**< allow salvsync */
+};
#endif /* AFS_DEMAND_ATTACH_FS */
} else {
VLRU_SetOptions(VLRU_SET_ENABLED, 0);
}
+ assert(pthread_key_create(&VThread_key, NULL) == 0);
#endif
#ifdef AFS_PTHREAD_ENV
return -1;
if (programType == fileServer) {
- struct DiskPartition *diskP;
+ struct DiskPartition64 *diskP;
#ifdef AFS_PTHREAD_ENV
struct vinitvolumepackage_thread_t params;
struct diskpartition_queue_t * dpq;
#ifdef FSSYNC_BUILD_CLIENT
if (programType == volumeUtility && connect) {
if (!VConnectFS()) {
- Log("Unable to connect to file server; aborted\n");
- exit(1);
+ Log("Unable to connect to file server; will retry at need\n");
+ /*exit(1);*/
}
}
#ifdef AFS_DEMAND_ATTACH_FS
DIR *dirp;
struct dirent *dp;
- struct DiskPartition *diskP;
+ struct DiskPartition64 *diskP;
struct vinitvolumepackage_thread_t * params;
struct diskpartition_queue_t * dpq;
* attach all volumes on a given disk partition
*/
static int
-VAttachVolumesByPartition(struct DiskPartition *diskP, int * nAttached, int * nUnattached)
+VAttachVolumesByPartition(struct DiskPartition64 *diskP, int * nAttached, int * nUnattached)
{
DIR * dirp;
struct dirent * dp;
register Volume *vp, *np;
register afs_int32 code;
#ifdef AFS_DEMAND_ATTACH_FS
- struct DiskPartition * diskP;
+ struct DiskPartition64 * diskP;
struct diskpartition_queue_t * dpq;
vshutdown_thread_t params;
pthread_t tid;
dpq->diskP = diskP;
queue_Prepend(¶ms, dpq);
- params.part_pass_head[diskP->device] = queue_First(&diskP->vol_list, rx_queue);
+ params.part_pass_head[diskP->index] = queue_First(&diskP->vol_list, rx_queue);
}
Log("VShutdown: beginning parallel fileserver shutdown\n");
VVByPListEndExclusive_r(diskP);
Log("VShutdown: %s stats : (pass[0]=%d, pass[1]=%d, pass[2]=%d, pass[3]=%d)\n",
VPartitionPath(diskP),
- params.stats[0][diskP->device],
- params.stats[1][diskP->device],
- params.stats[2][diskP->device],
- params.stats[3][diskP->device]);
+ params.stats[0][diskP->index],
+ params.stats[1][diskP->index],
+ params.stats[2][diskP->index],
+ params.stats[3][diskP->index]);
}
Log("VShutdown: shutdown finished using %d threads\n", params.n_threads);
ShutdownController(vshutdown_thread_t * params)
{
/* XXX debug */
- struct DiskPartition * diskP;
+ struct DiskPartition64 * diskP;
Device id;
vshutdown_thread_t shadow;
Log("ShutdownController: n_threads_complete=%d, n_parts_done_pass=%d\n",
shadow.n_threads_complete, shadow.n_parts_done_pass);
for (diskP = DiskPartitionList; diskP; diskP=diskP->next) {
- id = diskP->device;
+ id = diskP->index;
Log("ShutdownController: part[%d] : (len=%d, thread_target=%d, done_pass=%d, pass_head=%p)\n",
id,
diskP->vol_list.len,
static void
ShutdownCreateSchedule(vshutdown_thread_t * params)
{
- struct DiskPartition * diskP;
+ struct DiskPartition64 * diskP;
int sum, thr_workload, thr_left;
int part_residue[VOLMAXPARTS+1];
Device id;
/* for fairness, give every partition with volumes remaining
* at least one thread */
for (diskP = DiskPartitionList; diskP && thr_left; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
if (diskP->vol_list.len) {
params->part_thread_target[id] = 1;
thr_left--;
int delta;
for (diskP = DiskPartitionList; diskP && thr_left; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
delta = (diskP->vol_list.len / thr_workload) -
params->part_thread_target[id];
if (delta < 0) {
/* compute the residues */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
part_residue[id] = diskP->vol_list.len -
(params->part_thread_target[id] * thr_workload);
}
while (thr_left) {
max_residue = 0;
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
if (part_residue[id] > max_residue) {
max_residue = part_residue[id];
max_id = id;
if (thr_left >= params->n_parts) {
alloc = thr_left / params->n_parts;
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
params->part_thread_target[id] += alloc;
thr_left -= alloc;
}
/* finish off the last of the threads */
for (diskP = DiskPartitionList; thr_left && diskP; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
params->part_thread_target[id]++;
thr_left--;
}
Volume * vp;
vshutdown_thread_t * params;
int part, code, found, pass, schedule_version_save, count;
- struct DiskPartition *diskP;
+ struct DiskPartition64 *diskP;
struct diskpartition_queue_t * dpq;
Device id;
assert(pthread_mutex_unlock(¶ms->lock) == 0);
diskP = dpq->diskP;
free(dpq);
- id = diskP->device;
+ id = diskP->index;
count = 0;
while (ShutdownVolumeWalk_r(diskP, 0, ¶ms->part_pass_head[id]))
count++;
- params->stats[0][diskP->device] = count;
+ params->stats[0][diskP->index] = count;
assert(pthread_mutex_lock(¶ms->lock) == 0);
}
found = 0;
/* find a disk partition to work on */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
if (params->part_thread_target[id] && !params->part_done_pass[id]) {
params->part_thread_target[id]--;
found = 1;
/* hmm. for some reason the controller thread couldn't find anything for
* us to do. let's see if there's anything we can do */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
if (diskP->vol_list.len && !params->part_done_pass[id]) {
found = 1;
break;
params->n_parts_done_pass = 0;
params->pass++;
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
- id = diskP->device;
+ id = diskP->index;
params->part_done_pass[id] = 0;
params->part_pass_head[id] = queue_First(&diskP->vol_list, rx_queue);
}
* note that this function will not allow mp-fast
* shutdown of a partition */
int
-VShutdownByPartition_r(struct DiskPartition * dp)
+VShutdownByPartition_r(struct DiskPartition64 * dp)
{
int pass, retVal;
int pass_stats[4];
* traversal
*/
static int
-ShutdownVByPForPass_r(struct DiskPartition * dp, int pass)
+ShutdownVByPForPass_r(struct DiskPartition64 * dp, int pass)
{
struct rx_queue * q = queue_First(&dp->vol_list, rx_queue);
register int i = 0;
* returns 1 if a volume was shutdown in this pass,
* 0 otherwise */
static int
-ShutdownVolumeWalk_r(struct DiskPartition * dp, int pass,
+ShutdownVolumeWalk_r(struct DiskPartition64 * dp, int pass,
struct rx_queue ** idx)
{
struct rx_queue *qp, *nqp;
VolId volumeId)
{
Volume *vp;
- struct DiskPartition *partp;
+ struct DiskPartition64 *partp;
*ec = 0;
*/
Volume *
VPreAttachVolumeByVp_r(Error * ec,
- struct DiskPartition * partp,
+ struct DiskPartition64 * partp,
Volume * vp,
VolId vid)
{
/* check to see if pre-attach already happened */
if (vp &&
(V_attachState(vp) != VOL_STATE_UNATTACHED) &&
- !VIsErrorState(V_attachState(vp)) &&
- ((V_attachState(vp) != VOL_STATE_PREATTACHED) ||
- vp->pending_vol_op == NULL)) {
+ (V_attachState(vp) != VOL_STATE_PREATTACHED) &&
+ !VIsErrorState(V_attachState(vp))) {
/*
* pre-attach is a no-op in all but the following cases:
*
* - volume is unattached
* - volume is in an error state
- * - volume is pre-attached with a pending volume operation
- * (e.g. vos move between two partitions on same server)
+ * - volume is pre-attached
*/
+ Log("VPreattachVolumeByVp_r: volume %u not in quiescent state\n", vid);
goto done;
} else if (vp) {
/* we're re-attaching a volume; clear out some old state */
/* link the volume with its associated vice partition */
vp->device = partp->device;
vp->partition = partp;
+
vp->hashid = vid;
+ vp->specialStatus = 0;
/* if we dropped the lock, reacquire the lock,
* check for pre-attach races, and then add
struct afs_stat status;
struct VolumeDiskHeader diskHeader;
struct VolumeHeader iheader;
- struct DiskPartition *partp;
+ struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
VolId volumeId;
VWaitExclusiveState_r(vp);
/* at this point state must be one of:
- * UNATTACHED,
- * ATTACHED,
- * SHUTTING_DOWN,
- * GOING_OFFLINE,
- * SALVAGING,
- * ERROR
+ * - UNATTACHED
+ * - ATTACHED
+ * - SHUTTING_DOWN
+ * - GOING_OFFLINE
+ * - SALVAGING
+ * - ERROR
*/
if (vp->specialStatus == VBUSY)
/* if it's already attached, see if we can return it */
if (V_attachState(vp) == VOL_STATE_ATTACHED) {
VGetVolumeByVp_r(ec, vp);
- if (V_inUse(vp)) {
+ if (V_inUse(vp) == fileServer) {
VCancelReservation_r(vp);
return vp;
}
#else /* AFS_DEMAND_ATTACH_FS */
vp = VGetVolume_r(ec, volumeId);
if (vp) {
- if (V_inUse(vp))
+ if (V_inUse(vp) == fileServer)
return vp;
if (vp->specialStatus == VBUSY)
isbusy = 1;
vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
if (programType == volumeUtility && vp) {
+ if ((mode == V_VOLUPD) || (VolumeWriteable(vp) && (mode == V_CLONE))) {
+ /* mark volume header as in use so that volser crashes lead to a
+ * salvage attempt */
+ VUpdateVolume_r(ec, vp, 0);
+ }
#ifdef AFS_DEMAND_ATTACH_FS
/* for dafs, we should tell the fileserver, except for V_PEEK
* where we know it is not necessary */
struct afs_stat status;
struct VolumeDiskHeader diskHeader;
struct VolumeHeader iheader;
- struct DiskPartition *partp;
+ struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
VolId volumeId;
/* if it's already attached, see if we can return it */
if (V_attachState(vp) == VOL_STATE_ATTACHED) {
VGetVolumeByVp_r(ec, vp);
- if (V_inUse(vp)) {
+ if (V_inUse(vp) == fileServer) {
return vp;
} else {
if (vp->specialStatus == VBUSY)
*/
private Volume *
attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * header,
- struct DiskPartition * partp, register Volume * vp, int isbusy, int mode)
+ struct DiskPartition64 * partp, register Volume * vp, int isbusy, int mode)
{
vp->specialStatus = (byte) (isbusy ? VBUSY : 0);
IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header->parent,
res.payload.buf = &vp->header->diskstuff;
if (FSYNC_VolOp(volumeId,
- VPartitionPath(partp),
+ partp->name,
FSYNC_VOL_QUERY_HDR,
FSYNC_WHATEVER,
&res) == SYNC_OK) {
/* check for pending volume operations */
if (vp->pending_vol_op) {
/* see if the pending volume op requires exclusive access */
- if (!VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
+ switch (vp->pending_vol_op->vol_op_state) {
+ case FSSYNC_VolOpPending:
+ /* this should never happen */
+ assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpPending);
+ break;
+
+ case FSSYNC_VolOpRunningUnknown:
+ vp->pending_vol_op->vol_op_state =
+ (VVolOpLeaveOnline_r(vp, vp->pending_vol_op) ?
+ FSSYNC_VolOpRunningOnline : FSSYNC_VolOpRunningOffline);
+ /* fall through */
+
+ case FSSYNC_VolOpRunningOffline:
/* mark the volume down */
*ec = VOFFLINE;
VChangeState_r(vp, VOL_STATE_UNATTACHED);
if (vp->specialStatus)
vp->specialStatus = 0;
if (V_blessed(vp) && V_inService(vp) && !V_needsSalvaged(vp)) {
- V_inUse(vp) = 1;
+ V_inUse(vp) = fileServer;
V_offlineMessage(vp)[0] = '\0';
}
+ } else {
+ if ((mode != V_PEEK) && (mode != V_SECRETLY))
+ V_inUse(vp) = programType;
+ V_checkoutMode(vp) = mode;
}
AddVolumeToHashTable(vp, V_id(vp));
#ifdef AFS_DEMAND_ATTACH_FS
- AddVolumeToVByPList_r(vp);
- VLRU_Add_r(vp);
if ((programType != fileServer) ||
- V_inUse(vp)) {
+ (V_inUse(vp) == fileServer)) {
+ AddVolumeToVByPList_r(vp);
+ VLRU_Add_r(vp);
VChangeState_r(vp, VOL_STATE_ATTACHED);
} else {
VChangeState_r(vp, VOL_STATE_UNATTACHED);
VAttachVolume_r(Error * ec, VolumeId volumeId, int mode)
{
char *part, *name;
- GetVolumePath(ec, volumeId, &part, &name);
+ VGetVolumePath(ec, volumeId, &part, &name);
if (*ec) {
register Volume *vp;
Error error;
Volume *avp, * rvp = hint;
#endif
+ /*
+ * if VInit is zero, the volume package dynamic
+ * data structures have not been initialized yet,
+ * and we must immediately return an error
+ */
+ if (VInit == 0) {
+ vp = NULL;
+ *ec = VOFFLINE;
+ if (client_ec) {
+ *client_ec = VOFFLINE;
+ }
+ goto not_inited;
+ }
+
#ifdef AFS_DEMAND_ATTACH_FS
if (rvp) {
VCreateReservation_r(rvp);
/* short circuit with VNOVOL in the following circumstances:
*
- * VOL_STATE_ERROR
- * VOL_STATE_SHUTTING_DOWN
+ * - VOL_STATE_ERROR
+ * - VOL_STATE_SHUTTING_DOWN
*/
if ((V_attachState(vp) == VOL_STATE_ERROR) ||
- (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN)) {
+ (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN) ||
+ (V_attachState(vp) == VOL_STATE_GOING_OFFLINE)) {
*ec = VNOVOL;
vp = NULL;
break;
/*
* short circuit with VOFFLINE in the following circumstances:
*
- * VOL_STATE_UNATTACHED
+ * - VOL_STATE_UNATTACHED
*/
if (V_attachState(vp) == VOL_STATE_UNATTACHED) {
- *ec = VOFFLINE;
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
+ } else {
+ *ec = VOFFLINE;
+ }
vp = NULL;
break;
}
/* allowable states:
- * UNATTACHED
- * PREATTACHED
- * ATTACHED
- * GOING_OFFLINE
- * SALVAGING
+ * - PREATTACHED
+ * - ATTACHED
+ * - SALVAGING
*/
if (vp->salvage.requested) {
/*
* this test MUST happen after the volume header is loaded
*/
- if (vp->pending_vol_op && !VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
- if (client_ec) {
- /* see CheckVnode() in afsfileprocs.c for an explanation
- * of this error code logic */
- afs_uint32 now = FT_ApproxTime();
- if ((vp->stats.last_vol_op + (10 * 60)) >= now) {
- *client_ec = VBUSY;
- } else {
- *client_ec = VRESTARTING;
- }
- }
- *ec = VOFFLINE;
- ReleaseVolumeHeader(vp->header);
- vp = NULL;
- break;
+
+ /* only valid before/during demand attachment */
+ assert(!vp->pending_vol_op || vp->pending_vol_op != FSSYNC_VolOpRunningUnknown);
+
+ /* deny getvolume due to running mutually exclusive vol op */
+ if (vp->pending_vol_op && vp->pending_vol_op->vol_op_state==FSSYNC_VolOpRunningOffline) {
+ /*
+ * volume cannot remain online during this volume operation.
+ * notify client.
+ */
+ if (vp->specialStatus) {
+ /*
+ * special status codes outrank normal VOFFLINE code
+ */
+ *ec = vp->specialStatus;
+ if (client_ec) {
+ *client_ec = vp->specialStatus;
+ }
+ } else {
+ if (client_ec) {
+ /* see CheckVnode() in afsfileprocs.c for an explanation
+ * of this error code logic */
+ afs_uint32 now = FT_ApproxTime();
+ if ((vp->stats.last_vol_op + (10 * 60)) >= now) {
+ *client_ec = VBUSY;
+ } else {
+ *client_ec = VRESTARTING;
+ }
+ }
+ *ec = VOFFLINE;
+ }
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
+ FreeVolumeHeader(vp);
+ vp = NULL;
+ break;
}
#endif /* AFS_DEMAND_ATTACH_FS */
}
#endif /* AFS_DEMAND_ATTACH_FS */
+ not_inited:
assert(vp || *ec);
return vp;
}
#endif /* AFS_DEMAND_ATTACH_FS */
}
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * Take a volume offline in order to perform a volume operation.
+ *
+ * @param[inout] ec address in which to store error code
+ * @param[in] vp volume object pointer
+ * @param[in] message volume offline status message
+ *
+ * @pre
+ * - VOL_LOCK is held
+ * - caller MUST hold a heavyweight ref on vp
+ *
+ * @post
+ * - volume is taken offline
+ * - if possible, volume operation is promoted to running state
+ * - on failure, *ec is set to nonzero
+ *
+ * @note Although this function does not return any value, it may
+ * still fail to promote our pending volume operation to
+ * a running state. Any caller MUST check the value of *ec,
+ * and MUST NOT blindly assume success.
+ *
+ * @warning if the caller does not hold a lightweight ref on vp,
+ * then it MUST NOT reference vp after this function
+ * returns to the caller.
+ *
+ * @internal volume package internal use only
+ */
+void
+VOfflineForVolOp_r(Error *ec, Volume *vp, char *message)
+{
+ assert(vp->pending_vol_op);
+ if (!V_inUse(vp)) {
+ VPutVolume_r(vp);
+ *ec = 1;
+ return;
+ }
+ if (V_offlineMessage(vp)[0] == '\0')
+ strncpy(V_offlineMessage(vp), message, sizeof(V_offlineMessage(vp)));
+ V_offlineMessage(vp)[sizeof(V_offlineMessage(vp)) - 1] = '\0';
+
+ vp->goingOffline = 1;
+ VChangeState_r(vp, VOL_STATE_GOING_OFFLINE);
+ VCreateReservation_r(vp);
+ VPutVolume_r(vp);
+
+ /* Wait for the volume to go offline */
+ while (!VIsOfflineState(V_attachState(vp))) {
+ /* do not give corrupted volumes to the volserver */
+ if (vp->salvage.requested && vp->pending_vol_op->com.programType != salvageServer) {
+ *ec = 1;
+ goto error;
+ }
+ VWaitStateChange_r(vp);
+ }
+ *ec = 0;
+ error:
+ VCancelReservation_r(vp);
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
void
VOffline(Volume * vp, char *message)
{
VDetachVolume_r(Error * ec, Volume * vp)
{
VolumeId volume;
- struct DiskPartition *tpartp;
- int notifyServer, useDone = FSYNC_VOL_ON;
+ struct DiskPartition64 *tpartp;
+ int notifyServer = 0;
+ int useDone = FSYNC_VOL_ON;
*ec = 0; /* always "succeeds" */
if (programType == volumeUtility) {
DeleteVolumeFromVByPList_r(vp);
VLRU_Delete_r(vp);
VChangeState_r(vp, VOL_STATE_SHUTTING_DOWN);
+#else
+ if (programType != fileServer)
+ V_inUse(vp) = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
/* Will be detached sometime in the future--this is OK since volume is offline */
VCheckDetach(register Volume * vp)
{
int ret = 0;
+ Error ec = 0;
if (vp->nUsers || vp->nWaiters)
return ret;
if (vp->shuttingDown) {
ret = 1;
+ if ((programType != fileServer) &&
+ (V_inUse(vp) == programType) &&
+ ((V_checkoutMode(vp) == V_VOLUPD) ||
+ ((V_checkoutMode(vp) == V_CLONE) &&
+ (VolumeWriteable(vp))))) {
+ V_inUse(vp) = 0;
+ VUpdateVolume_r(&ec, vp, VOL_UPDATE_NOFORCEOFF);
+ if (ec) {
+ Log("VCheckDetach: volume header update for volume %u "
+ "failed with errno %d\n", vp->hashid, errno);
+ }
+ }
VReleaseVolumeHandles_r(vp);
VCheckSalvage(vp);
ReallyFreeVolume(vp);
VCheckDetach(register Volume * vp)
{
int ret = 0;
+ Error ec = 0;
if (vp->nUsers)
return ret;
if (vp->shuttingDown) {
ret = 1;
+ if ((programType != fileServer) &&
+ (V_inUse(vp) == programType) &&
+ ((V_checkoutMode(vp) == V_VOLUPD) ||
+ ((V_checkoutMode(vp) == V_CLONE) &&
+ (VolumeWriteable(vp))))) {
+ V_inUse(vp) = 0;
+ VUpdateVolume_r(&ec, vp, VOL_UPDATE_NOFORCEOFF);
+ if (ec) {
+ Log("VCheckDetach: volume header update for volume %u failed with errno %d\n",
+ vp->hashid, errno);
+ }
+ }
VReleaseVolumeHandles_r(vp);
ReallyFreeVolume(vp);
if (programType == fileServer) {
int
VVolOpLeaveOnline_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
- return (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ return (vopinfo->vol_op_state == FSSYNC_VolOpRunningOnline ||
+ (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
(vopinfo->com.reason == V_READONLY ||
(!VolumeWriteable(vp) &&
(vopinfo->com.reason == V_CLONE ||
- vopinfo->com.reason == V_DUMP))));
+ vopinfo->com.reason == V_DUMP)))));
}
/**
int
VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
- return (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ return ((vopinfo->com.command == FSYNC_VOL_OFF &&
+ vopinfo->com.reason == FSYNC_SALVAGE) ||
+ (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
(vopinfo->com.reason == V_CLONE ||
- vopinfo->com.reason == V_DUMP));
+ vopinfo->com.reason == V_DUMP)));
}
vp->salvage.reason = reason;
vp->stats.last_salvage = FT_ApproxTime();
if (flags & VOL_SALVAGE_INVALIDATE_HEADER) {
- /* XXX this should likely be changed to FreeVolumeHeader() */
- ReleaseVolumeHeader(vp->header);
+ /* Instead of ReleaseVolumeHeader, we do FreeVolumeHeader()
+ so that the the next VAttachVolumeByVp_r() invocation
+ of attach2() will pull in a cached header
+ entry and fail, then load a fresh one from disk and attach
+ it to the volume.
+ */
+ FreeVolumeHeader(vp);
}
if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
VChangeState_r(vp, VOL_STATE_SALVAGING);
int code, ret=0;
#ifdef SALVSYNC_BUILD_CLIENT
VolState state_save;
+ VThreadOptions_t * thread_opts;
char partName[16];
if (vp->nWaiters || vp->nUsers) {
if (vp->stats.salvages >= SALVAGE_COUNT_MAX)
return 1;
+ /*
+ * don't perform salvsync ops on certain threads
+ */
+ thread_opts = pthread_getspecific(VThread_key);
+ if (thread_opts == NULL) {
+ thread_opts = &VThread_defaults;
+ }
+ if (thread_opts->disallow_salvsync) {
+ return 1;
+ }
+
+ /*
+ * XXX the scheduling process should really be done asynchronously
+ * to avoid fssync deadlocks
+ */
if (!vp->salvage.scheduled) {
/* if we haven't previously scheduled a salvage, do so now
*
*/
strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName));
state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
- V_attachFlags(vp) |= VOL_IS_BUSY;
VOL_UNLOCK;
/* can't use V_id() since there's no guarantee
NULL);
VOL_LOCK;
VChangeState_r(vp, state_save);
- V_attachFlags(vp) &= ~(VOL_IS_BUSY);
if (code == SYNC_OK) {
vp->salvage.scheduled = 1;
*
* @pre VOL_LOCK is held.
*
- * @post salvageserver is sent a request to cancel the volume salvage
- *
- * @todo should set exclusive state and drop glock around salvsync call
+ * @post salvageserver is sent a request to cancel the volume salvage.
+ * volume is transitioned to a hard error state.
*
* @internal volume package internal use only.
*/
#ifdef SALVSYNC_BUILD_CLIENT
if (vp->salvage.scheduled) {
+ VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
+ VOL_UNLOCK;
+
+ /* can't use V_id() since there's no guarantee
+ * we have the disk data header at this point */
code = SALVSYNC_SalvageVolume(vp->hashid,
VPartitionPath(vp->partition),
SALVSYNC_CANCEL,
reason,
0,
NULL);
+
+ VOL_LOCK;
+ VChangeState_r(vp, VOL_STATE_ERROR);
+
if (code == SYNC_OK) {
vp->salvage.scheduled = 0;
+ vp->salvage.requested = 0;
} else {
ret = 1;
}
* on a vice partition, it is possible for callers to get the wrong one,
* depending on the order of the disk partition linked list.
*
- * @internal volume package internal use only.
*/
-static void
-GetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep)
+void
+VGetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep)
{
static char partition[VMAXPATHLEN], name[VMAXPATHLEN];
char path[VMAXPATHLEN];
int found = 0;
- struct DiskPartition *dp;
+ struct DiskPartition64 *dp;
*ec = 0;
name[0] = '/';
min_delay = 0;
min_idx = i;
overdue = 1;
- break;
}
}
volume_hdr_LRU.stats.used = howMany;
volume_hdr_LRU.stats.attached = 0;
hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader)));
+ assert(hp != NULL);
+
while (howMany--)
+ /* We are using ReleaseVolumeHeader to initialize the values on the header list
+ * to ensure they have the right values
+ */
ReleaseVolumeHeader(hp++);
}
#endif /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
/* maintain (nUsers==0) => header in LRU invariant */
- ReleaseVolumeHeader(vp->header);
+ FreeVolumeHeader(vp);
}
}
*/
/* take exclusive control over the list */
static void
-VVByPListBeginExclusive_r(struct DiskPartition * dp)
+VVByPListBeginExclusive_r(struct DiskPartition64 * dp)
{
assert(dp->vol_list.busy == 0);
dp->vol_list.busy = 1;
* @internal volume package internal use only.
*/
static void
-VVByPListEndExclusive_r(struct DiskPartition * dp)
+VVByPListEndExclusive_r(struct DiskPartition64 * dp)
{
assert(dp->vol_list.busy);
dp->vol_list.busy = 0;
* @internal volume package internal use only.
*/
static void
-VVByPListWait_r(struct DiskPartition * dp)
+VVByPListWait_r(struct DiskPartition64 * dp)
{
while (dp->vol_list.busy) {
VOL_CV_WAIT(&dp->vol_list.cv);
return buf;
}
+struct VLRUExtStatsEntry {
+ VolumeId volid;
+};
+
+struct VLRUExtStats {
+ afs_uint32 len;
+ afs_uint32 used;
+ struct {
+ afs_uint32 start;
+ afs_uint32 len;
+ } queue_info[VLRU_QUEUE_INVALID];
+ struct VLRUExtStatsEntry * vec;
+};
+
+/**
+ * add a 256-entry fudge factor onto the vector in case state changes
+ * out from under us.
+ */
+#define VLRU_EXT_STATS_VEC_LEN_FUDGE 256
+
+/**
+ * collect extended statistics for the VLRU subsystem.
+ *
+ * @param[out] stats pointer to stats structure to be populated
+ * @param[in] nvols number of volumes currently known to exist
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post stats->vec allocated and populated
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval 1 failure
+ */
+static int
+VVLRUExtStats_r(struct VLRUExtStats * stats, afs_uint32 nvols)
+{
+ afs_uint32 cur, idx, len;
+ struct rx_queue * qp, * nqp;
+ Volume * vp;
+ struct VLRUExtStatsEntry * vec;
+
+ len = nvols + VLRU_EXT_STATS_VEC_LEN_FUDGE;
+ vec = stats->vec = calloc(len,
+ sizeof(struct VLRUExtStatsEntry));
+ if (vec == NULL) {
+ return 1;
+ }
+
+ cur = 0;
+ for (idx = VLRU_QUEUE_NEW; idx < VLRU_QUEUE_INVALID; idx++) {
+ VLRU_Wait_r(&volume_LRU.q[idx]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[idx]);
+ VOL_UNLOCK;
+
+ stats->queue_info[idx].start = cur;
+
+ for (queue_Scan(&volume_LRU.q[idx], qp, nqp, rx_queue)) {
+ if (cur == len) {
+ /* out of space in vec */
+ break;
+ }
+ vp = (Volume *)((char *)qp - offsetof(Volume, vlru));
+ vec[cur].volid = vp->hashid;
+ cur++;
+ }
+
+ stats->queue_info[idx].len = cur - stats->queue_info[idx].start;
+
+ VOL_LOCK;
+ VLRU_EndExclusive_r(&volume_LRU.q[idx]);
+ }
+
+ stats->len = len;
+ stats->used = cur;
+ return 0;
+}
+
+#define ENUMTOSTRING(en) #en
+#define ENUMCASE(en) \
+ case en: \
+ return ENUMTOSTRING(en); \
+ break
+
+static char *
+vlru_idx_to_string(int idx)
+{
+ switch (idx) {
+ ENUMCASE(VLRU_QUEUE_NEW);
+ ENUMCASE(VLRU_QUEUE_MID);
+ ENUMCASE(VLRU_QUEUE_OLD);
+ ENUMCASE(VLRU_QUEUE_CANDIDATE);
+ ENUMCASE(VLRU_QUEUE_HELD);
+ ENUMCASE(VLRU_QUEUE_INVALID);
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
void
VPrintExtendedCacheStats_r(int flags)
{
int i, j;
+ afs_uint32 vol_sum = 0;
struct stats {
double min;
double max;
char pr_buf[4][32];
VolumeHashChainHead *head;
Volume *vp, *np;
+ struct VLRUExtStats vlru_stats;
/* zero out stats */
memset(&looks, 0, sizeof(struct stats));
gets.sum += ch_gets.sum;
reorders.sum += ch_reorders.sum;
len.sum += (double)head->len;
+ vol_sum += head->len;
if (i == 0) {
len.min = (double) head->len;
/* print extended disk related statistics */
{
- struct DiskPartition * diskP;
+ struct DiskPartition64 * diskP;
afs_uint32 vol_count[VOLMAXPARTS+1];
byte part_exists[VOLMAXPARTS+1];
Device id;
VOL_UNLOCK;
for (i = 0; i <= VOLMAXPARTS; i++) {
if (part_exists[i]) {
+ /* XXX while this is currently safe, it is a violation
+ * of the VGetPartitionById_r interface contract. */
diskP = VGetPartitionById_r(i, 0);
if (diskP) {
Log("Partition %s has %d online volumes\n",
VOL_LOCK;
}
+ /* print extended VLRU statistics */
+ if (VVLRUExtStats_r(&vlru_stats, vol_sum) == 0) {
+ afs_uint32 idx, cur, lpos;
+ VOL_UNLOCK;
+ VolumeId line[5];
+
+ Log("VLRU State Dump:\n\n");
+
+ for (idx = VLRU_QUEUE_NEW; idx < VLRU_QUEUE_INVALID; idx++) {
+ Log("\t%s:\n", vlru_idx_to_string(idx));
+
+ lpos = 0;
+ for (cur = vlru_stats.queue_info[idx].start;
+ cur < vlru_stats.queue_info[idx].len;
+ cur++) {
+ line[lpos++] = vlru_stats.vec[cur].volid;
+ if (lpos==5) {
+ Log("\t\t%u, %u, %u, %u, %u,\n",
+ line[0], line[1], line[2], line[3], line[4]);
+ lpos = 0;
+ }
+ }
+
+ if (lpos) {
+ while (lpos < 5) {
+ line[lpos++] = 0;
+ }
+ Log("\t\t%u, %u, %u, %u, %u\n",
+ line[0], line[1], line[2], line[3], line[4]);
+ }
+ Log("\n");
+ }
+
+ free(vlru_stats.vec);
+
+ VOL_LOCK;
+ }
}
void