/*
* Copyright 2000, International Business Machines Corporation and others.
* All Rights Reserved.
- *
+ *
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
#include <afsconfig.h>
#include <afs/param.h>
+#include <roken.h>
+#include <afs/opr.h>
+
+#include <ctype.h>
+#include <stddef.h>
+
+#ifdef HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
#include <rx/xdr.h>
#include <afs/afsint.h>
-#include <ctype.h>
-#include <signal.h>
+
#ifndef AFS_NT40_ENV
-#include <sys/param.h>
#if !defined(AFS_SGI_ENV)
#ifdef AFS_OSF_ENV
#include <ufs/fs.h>
#endif
#endif
#else /* AFS_VFSINCL_ENV */
-#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
#include <sys/fs.h>
#endif
#endif /* AFS_VFSINCL_ENV */
#endif /* AFS_OSF_ENV */
#endif /* AFS_SGI_ENV */
-#endif /* AFS_NT40_ENV */
-#include <errno.h>
-#include <sys/stat.h>
-#include <stdio.h>
-#ifdef AFS_NT40_ENV
-#include <fcntl.h>
-#else
-#include <sys/file.h>
-#endif
-#include <dirent.h>
+#endif /* !AFS_NT40_ENV */
+
#ifdef AFS_AIX_ENV
#include <sys/vfs.h>
-#include <fcntl.h>
#else
#ifdef AFS_HPUX_ENV
-#include <fcntl.h>
#include <mntent.h>
#else
#if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
#else
#ifndef AFS_NT40_ENV
#if defined(AFS_SGI_ENV)
-#include <fcntl.h>
#include <mntent.h>
-
#else
#ifndef AFS_LINUX20_ENV
#include <fstab.h> /* Need to find in libc 5, present in libc 6 */
#endif
#endif /* AFS_HPUX_ENV */
#endif
-#ifndef AFS_NT40_ENV
-#include <netdb.h>
-#include <netinet/in.h>
-#include <sys/wait.h>
-#include <setjmp.h>
-#ifndef ITIMER_REAL
-#include <sys/time.h>
-#endif /* ITIMER_REAL */
-#endif /* AFS_NT40_ENV */
-#if defined(AFS_SUN5_ENV) || defined(AFS_NT40_ENV) || defined(AFS_LINUX20_ENV)
-#include <string.h>
-#else
-#include <strings.h>
-#endif
#include "nfs.h"
#include <afs/errors.h>
#include <afs/afssyscalls.h>
#include "ihandle.h"
#include <afs/afsutil.h>
-#ifdef AFS_NT40_ENV
-#include <io.h>
-#endif
#include "daemon_com.h"
#include "fssync.h"
#include "salvsync.h"
#include "volume.h"
#include "partition.h"
#include "volume_inline.h"
-#ifdef AFS_PTHREAD_ENV
-#include <assert.h>
-#else /* AFS_PTHREAD_ENV */
-#include "afs/assert.h"
-#endif /* AFS_PTHREAD_ENV */
+#include "common.h"
#include "vutils.h"
-#ifndef AFS_NT40_ENV
#include <afs/dir.h>
-#include <unistd.h>
-#endif
-
-#if !defined(offsetof)
-#include <stddef.h>
-#endif
-
-#ifdef O_LARGEFILE
-#define afs_stat stat64
-#define afs_fstat fstat64
-#define afs_open open64
-#else /* !O_LARGEFILE */
-#define afs_stat stat
-#define afs_fstat fstat
-#define afs_open open
-#endif /* !O_LARGEFILE */
#ifdef AFS_PTHREAD_ENV
pthread_mutex_t vol_glock_mutex;
pthread_cond_t vol_put_volume_cond;
pthread_cond_t vol_sleep_cond;
pthread_cond_t vol_init_attach_cond;
+pthread_cond_t vol_vinit_cond;
int vol_attach_threads = 1;
#endif /* AFS_PTHREAD_ENV */
static volatile sig_atomic_t vol_disallow_salvsync = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
+/**
+ * has VShutdown_r been called / is VShutdown_r running?
+ */
+static int vol_shutting_down = 0;
+
#ifdef AFS_OSF_ENV
extern void *calloc(), *realloc();
#endif
-/*@printflike@*/ extern void Log(const char *format, ...);
-
/* Forward declarations */
-static Volume *attach2(Error * ec, VolId vid, char *path,
- register struct VolumeHeader *header,
- struct DiskPartition64 *partp, Volume * vp,
- int isbusy, int mode);
+static Volume *attach2(Error * ec, VolId volumeId, char *path,
+ struct DiskPartition64 *partp, Volume * vp,
+ int isbusy, int mode, int *acheckedOut);
static void ReallyFreeVolume(Volume * vp);
#ifdef AFS_DEMAND_ATTACH_FS
static void FreeVolume(Volume * vp);
static void VScanUpdateList(void);
#endif /* !AFS_DEMAND_ATTACH_FS */
static void VInitVolumeHeaderCache(afs_uint32 howMany);
-static int GetVolumeHeader(register Volume * vp);
-static void ReleaseVolumeHeader(register struct volHeader *hd);
-static void FreeVolumeHeader(register Volume * vp);
-static void AddVolumeToHashTable(register Volume * vp, int hashid);
-static void DeleteVolumeFromHashTable(register Volume * vp);
+static int GetVolumeHeader(Volume * vp);
+static void ReleaseVolumeHeader(struct volHeader *hd);
+static void FreeVolumeHeader(Volume * vp);
+static void AddVolumeToHashTable(Volume * vp, int hashid);
+static void DeleteVolumeFromHashTable(Volume * vp);
#if 0
static int VHold(Volume * vp);
#endif
static void VReleaseVolumeHandles_r(Volume * vp);
static void VCloseVolumeHandles_r(Volume * vp);
static void LoadVolumeHeader(Error * ec, Volume * vp);
-static int VCheckOffline(register Volume * vp);
-static int VCheckDetach(register Volume * vp);
-static Volume * GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flags);
-#ifdef AFS_DEMAND_ATTACH_FS
-static int VolumeExternalName_r(VolumeId volumeId, char * name, size_t len);
-#endif
+static int VCheckOffline(Volume * vp);
+static int VCheckDetach(Volume * vp);
+static Volume * GetVolume(Error * ec, Error * client_ec, VolId volumeId,
+ Volume * hint, const struct timespec *ts);
int LogLevel; /* Vice loglevel--not defined as extern so that it will be
* defined when not linked with vice, XXXX */
/*
* when possible, don't just reorder single elements, but reorder
* entire chains of elements at once. a chain of elements that
- * exceed the element previous to the pivot by at least CHAIN_THRESH
+ * exceed the element previous to the pivot by at least CHAIN_THRESH
* accesses are moved in front of the chain whose elements have at
* least CHAIN_THRESH less accesses than the pivot element
*/
#endif /* !AFS_HAVE_FFS */
#ifdef AFS_PTHREAD_ENV
+/**
+ * disk partition queue element
+ */
typedef struct diskpartition_queue_t {
- struct rx_queue queue;
- struct DiskPartition64 * diskP;
+ struct rx_queue queue; /**< queue header */
+ struct DiskPartition64 *diskP; /**< disk partition table entry */
} diskpartition_queue_t;
+
+#ifndef AFS_DEMAND_ATTACH_FS
+
typedef struct vinitvolumepackage_thread_t {
struct rx_queue queue;
pthread_cond_t thread_done_cv;
int n_threads_complete;
} vinitvolumepackage_thread_t;
static void * VInitVolumePackageThread(void * args);
+
+#else /* !AFS_DEMAND_ATTTACH_FS */
+#define VINIT_BATCH_MAX_SIZE 512
+
+/**
+ * disk partition work queue
+ */
+struct partition_queue {
+ struct rx_queue head; /**< diskpartition_queue_t queue */
+ pthread_mutex_t mutex;
+ pthread_cond_t cv;
+};
+
+/**
+ * volumes parameters for preattach
+ */
+struct volume_init_batch {
+ struct rx_queue queue; /**< queue header */
+ int thread; /**< posting worker thread */
+ int last; /**< indicates thread is done */
+ int size; /**< number of volume ids in batch */
+ Volume *batch[VINIT_BATCH_MAX_SIZE]; /**< volumes ids to preattach */
+};
+
+/**
+ * volume parameters work queue
+ */
+struct volume_init_queue {
+ struct rx_queue head; /**< volume_init_batch queue */
+ pthread_mutex_t mutex;
+ pthread_cond_t cv;
+};
+
+/**
+ * volume init worker thread parameters
+ */
+struct vinitvolumepackage_thread_param {
+ int nthreads; /**< total number of worker threads */
+ int thread; /**< thread number for this worker thread */
+ struct partition_queue *pq; /**< queue partitions to scan */
+ struct volume_init_queue *vq; /**< queue of volume to preattach */
+};
+
+static void *VInitVolumePackageThread(void *args);
+static struct DiskPartition64 *VInitNextPartition(struct partition_queue *pq);
+static VolId VInitNextVolumeId(DIR *dirp);
+static int VInitPreAttachVolumes(int nthreads, struct volume_init_queue *vq);
+
+#endif /* !AFS_DEMAND_ATTACH_FS */
#endif /* AFS_PTHREAD_ENV */
-static int VAttachVolumesByPartition(struct DiskPartition64 *diskP,
+#ifndef AFS_DEMAND_ATTACH_FS
+static int VAttachVolumesByPartition(struct DiskPartition64 *diskP,
int * nAttached, int * nUnattached);
+#endif /* AFS_DEMAND_ATTACH_FS */
#ifdef AFS_DEMAND_ATTACH_FS
static void VVByPListWait_r(struct DiskPartition64 * dp);
/* online salvager */
-static int VCheckSalvage(register Volume * vp);
-#ifdef SALVSYNC_BUILD_CLIENT
+static int VCheckSalvage(Volume * vp);
+#if defined(SALVSYNC_BUILD_CLIENT) || defined(FSSYNC_BUILD_CLIENT)
static int VScheduleSalvage_r(Volume * vp);
#endif
#endif /* AFS_DEMAND_ATTACH_FS */
-struct Lock vol_listLock; /* Lock obtained when listing volumes:
- * prevents a volume from being missed
- * if the volume is attached during a
+struct Lock vol_listLock; /* Lock obtained when listing volumes:
+ * prevents a volume from being missed
+ * if the volume is attached during a
* list volumes */
/***************************************************/
/* Startup routines */
/***************************************************/
+
+#if defined(FAST_RESTART) && defined(AFS_DEMAND_ATTACH_FS)
+# error FAST_RESTART and DAFS are incompatible. For the DAFS equivalent \
+ of FAST_RESTART, use the -unsafe-nosalvage fileserver argument
+#endif
+
/**
* assign default values to a VolumePackageOptions struct.
*
opts->canUseFSSYNC = 0;
opts->canUseSALVSYNC = 0;
+ opts->interrupt_rxcall = NULL;
+ opts->offline_timeout = -1;
+ opts->offline_shutdown_timeout = -1;
+
+#ifdef FAST_RESTART
+ opts->unsafe_attach = 1;
+#else /* !FAST_RESTART */
+ opts->unsafe_attach = 0;
+#endif /* !FAST_RESTART */
+
switch (pt) {
case fileServer:
opts->canScheduleSalvage = 1;
}
}
+/**
+ * Set VInit to a certain value, and signal waiters.
+ *
+ * @param[in] value the value to set VInit to
+ *
+ * @pre VOL_LOCK held
+ */
+static void
+VSetVInit_r(int value)
+{
+ VInit = value;
+ CV_BROADCAST(&vol_vinit_cond);
+}
+
+static_inline void
+VLogOfflineTimeout(const char *type, afs_int32 timeout)
+{
+ if (timeout < 0) {
+ return;
+ }
+ if (timeout == 0) {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "immediately\n", type);
+ } else {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "after %ld second%s\n", type, (long)timeout, timeout==1?"":"s");
+ }
+}
+
int
VInitVolumePackage2(ProgramType pt, VolumePackageOptions * opts)
{
programType = pt;
vol_opts = *opts;
+#ifndef AFS_PTHREAD_ENV
+ if (opts->offline_timeout != -1 || opts->offline_shutdown_timeout != -1) {
+ Log("VInitVolumePackage: offline_timeout and/or "
+ "offline_shutdown_timeout was specified, but the volume package "
+ "does not support these for LWP builds\n");
+ return -1;
+ }
+#endif
+ VLogOfflineTimeout("volumes going offline", opts->offline_timeout);
+ VLogOfflineTimeout("volumes going offline during shutdown",
+ opts->offline_shutdown_timeout);
+
memset(&VStats, 0, sizeof(VStats));
VStats.hdr_cache_size = 200;
} else {
VLRU_SetOptions(VLRU_SET_ENABLED, 0);
}
- assert(pthread_key_create(&VThread_key, NULL) == 0);
+ osi_Assert(pthread_key_create(&VThread_key, NULL) == 0);
#endif
-#ifdef AFS_PTHREAD_ENV
- assert(pthread_mutex_init(&vol_glock_mutex, NULL) == 0);
- assert(pthread_mutex_init(&vol_trans_mutex, NULL) == 0);
- assert(pthread_cond_init(&vol_put_volume_cond, NULL) == 0);
- assert(pthread_cond_init(&vol_sleep_cond, NULL) == 0);
- assert(pthread_cond_init(&vol_init_attach_cond, NULL) == 0);
-#else /* AFS_PTHREAD_ENV */
+ MUTEX_INIT(&vol_glock_mutex, "vol glock", MUTEX_DEFAULT, 0);
+ MUTEX_INIT(&vol_trans_mutex, "vol trans", MUTEX_DEFAULT, 0);
+ CV_INIT(&vol_put_volume_cond, "vol put", CV_DEFAULT, 0);
+ CV_INIT(&vol_sleep_cond, "vol sleep", CV_DEFAULT, 0);
+ CV_INIT(&vol_init_attach_cond, "vol init attach", CV_DEFAULT, 0);
+ CV_INIT(&vol_vinit_cond, "vol init", CV_DEFAULT, 0);
+#ifndef AFS_PTHREAD_ENV
IOMGR_Initialize();
#endif /* AFS_PTHREAD_ENV */
Lock_Init(&vol_listLock);
srandom(time(0)); /* For VGetVolumeInfo */
#ifdef AFS_DEMAND_ATTACH_FS
- assert(pthread_mutex_init(&vol_salvsync_mutex, NULL) == 0);
+ MUTEX_INIT(&vol_salvsync_mutex, "salvsync", MUTEX_DEFAULT, 0);
#endif /* AFS_DEMAND_ATTACH_FS */
- /* Ok, we have done enough initialization that fileserver can
- * start accepting calls, even though the volumes may not be
+ /* Ok, we have done enough initialization that fileserver can
+ * start accepting calls, even though the volumes may not be
* available just yet.
*/
VInit = 1;
#if defined(AFS_DEMAND_ATTACH_FS) && defined(SALVSYNC_BUILD_CLIENT)
if (VCanUseSALVSYNC()) {
/* establish a connection to the salvager at this point */
- assert(VConnectSALV() != 0);
+ osi_Assert(VConnectSALV() != 0);
}
#endif /* AFS_DEMAND_ATTACH_FS */
return 0;
}
+
+#if !defined(AFS_PTHREAD_ENV)
+/**
+ * Attach volumes in vice partitions
+ *
+ * @param[in] pt calling program type
+ *
+ * @return 0
+ * @note This is the original, non-threaded version of attach parititions.
+ *
+ * @post VInit state is 2
+ */
int
VInitAttachVolumes(ProgramType pt)
{
- assert(VInit==1);
+ osi_Assert(VInit==1);
+ if (pt == fileServer) {
+ struct DiskPartition64 *diskP;
+ /* Attach all the volumes in this partition */
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ int nAttached = 0, nUnattached = 0;
+ osi_Assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
+ }
+ }
+ VOL_LOCK;
+ VSetVInit_r(2); /* Initialized, and all volumes have been attached */
+ LWP_NoYieldSignal(VInitAttachVolumes);
+ VOL_UNLOCK;
+ return 0;
+}
+#endif /* !AFS_PTHREAD_ENV */
+
+#if defined(AFS_PTHREAD_ENV) && !defined(AFS_DEMAND_ATTACH_FS)
+/**
+ * Attach volumes in vice partitions
+ *
+ * @param[in] pt calling program type
+ *
+ * @return 0
+ * @note Threaded version of attach parititions.
+ *
+ * @post VInit state is 2
+ */
+int
+VInitAttachVolumes(ProgramType pt)
+{
+ osi_Assert(VInit==1);
if (pt == fileServer) {
struct DiskPartition64 *diskP;
-#ifdef AFS_PTHREAD_ENV
struct vinitvolumepackage_thread_t params;
struct diskpartition_queue_t * dpq;
int i, threads, parts;
pthread_t tid;
pthread_attr_t attrs;
- assert(pthread_cond_init(¶ms.thread_done_cv,NULL) == 0);
+ CV_INIT(¶ms.thread_done_cv, "thread done", CV_DEFAULT, 0);
queue_Init(¶ms);
params.n_threads_complete = 0;
/* create partition work queue */
for (parts=0, diskP = DiskPartitionList; diskP; diskP = diskP->next, parts++) {
dpq = (diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
- assert(dpq != NULL);
+ osi_Assert(dpq != NULL);
dpq->diskP = diskP;
queue_Append(¶ms,dpq);
}
if (threads > 1) {
/* spawn off a bunch of initialization threads */
- assert(pthread_attr_init(&attrs) == 0);
- assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ osi_Assert(pthread_attr_init(&attrs) == 0);
+ osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
Log("VInitVolumePackage: beginning parallel fileserver startup\n");
-#ifdef AFS_DEMAND_ATTACH_FS
- Log("VInitVolumePackage: using %d threads to pre-attach volumes on %d partitions\n",
- threads, parts);
-#else /* AFS_DEMAND_ATTACH_FS */
Log("VInitVolumePackage: using %d threads to attach volumes on %d partitions\n",
threads, parts);
-#endif /* AFS_DEMAND_ATTACH_FS */
VOL_LOCK;
for (i=0; i < threads; i++) {
AFS_SIGSET_DECL;
AFS_SIGSET_CLEAR();
- assert(pthread_create
+ osi_Assert(pthread_create
(&tid, &attrs, &VInitVolumePackageThread,
¶ms) == 0);
AFS_SIGSET_RESTORE();
}
VOL_UNLOCK;
- assert(pthread_attr_destroy(&attrs) == 0);
+ osi_Assert(pthread_attr_destroy(&attrs) == 0);
} else {
/* if we're only going to run one init thread, don't bother creating
* another LWP */
Log("VInitVolumePackage: beginning single-threaded fileserver startup\n");
-#ifdef AFS_DEMAND_ATTACH_FS
- Log("VInitVolumePackage: using 1 thread to pre-attach volumes on %d partition(s)\n",
- parts);
-#else /* AFS_DEMAND_ATTACH_FS */
Log("VInitVolumePackage: using 1 thread to attach volumes on %d partition(s)\n",
parts);
-#endif /* AFS_DEMAND_ATTACH_FS */
VInitVolumePackageThread(¶ms);
}
- assert(pthread_cond_destroy(¶ms.thread_done_cv) == 0);
-
-#else /* AFS_PTHREAD_ENV */
-
- /* Attach all the volumes in this partition */
- for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
- int nAttached = 0, nUnattached = 0;
- assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
- }
-#endif /* AFS_PTHREAD_ENV */
+ CV_DESTROY(¶ms.thread_done_cv);
}
VOL_LOCK;
- VInit = 2; /* Initialized, and all volumes have been attached */
-#ifdef AFS_PTHREAD_ENV
- assert(pthread_cond_broadcast(&vol_init_attach_cond) == 0);
-#else
- LWP_NoYieldSignal(VInitAttachVolumes);
-#endif /* AFS_PTHREAD_ENV */
+ VSetVInit_r(2); /* Initialized, and all volumes have been attached */
+ CV_BROADCAST(&vol_init_attach_cond);
VOL_UNLOCK;
return 0;
}
-#ifdef AFS_PTHREAD_ENV
static void *
VInitVolumePackageThread(void * args) {
diskP = dpq->diskP;
free(dpq);
- assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
+ osi_Assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
VOL_LOCK;
}
done:
params->n_threads_complete++;
- pthread_cond_signal(¶ms->thread_done_cv);
+ CV_SIGNAL(¶ms->thread_done_cv);
VOL_UNLOCK;
return NULL;
}
-#endif /* AFS_PTHREAD_ENV */
+#endif /* AFS_PTHREAD_ENV && !AFS_DEMAND_ATTACH_FS */
+
+#if defined(AFS_DEMAND_ATTACH_FS)
+/**
+ * Attach volumes in vice partitions
+ *
+ * @param[in] pt calling program type
+ *
+ * @return 0
+ * @note Threaded version of attach partitions.
+ *
+ * @post VInit state is 2
+ */
+int
+VInitAttachVolumes(ProgramType pt)
+{
+ osi_Assert(VInit==1);
+ if (pt == fileServer) {
+
+ struct DiskPartition64 *diskP;
+ struct partition_queue pq;
+ struct volume_init_queue vq;
+
+ int i, threads, parts;
+ pthread_t tid;
+ pthread_attr_t attrs;
+
+ /* create partition work queue */
+ queue_Init(&pq);
+ CV_INIT(&(pq.cv), "partq", CV_DEFAULT, 0);
+ MUTEX_INIT(&(pq.mutex), "partq", MUTEX_DEFAULT, 0);
+ for (parts = 0, diskP = DiskPartitionList; diskP; diskP = diskP->next, parts++) {
+ struct diskpartition_queue_t *dp;
+ dp = (struct diskpartition_queue_t*)malloc(sizeof(struct diskpartition_queue_t));
+ osi_Assert(dp != NULL);
+ dp->diskP = diskP;
+ queue_Append(&pq, dp);
+ }
+
+ /* number of worker threads; at least one, not to exceed the number of partitions */
+ threads = MIN(parts, vol_attach_threads);
+
+ /* create volume work queue */
+ queue_Init(&vq);
+ CV_INIT(&(vq.cv), "volq", CV_DEFAULT, 0);
+ MUTEX_INIT(&(vq.mutex), "volq", MUTEX_DEFAULT, 0);
+
+ osi_Assert(pthread_attr_init(&attrs) == 0);
+ osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+
+ Log("VInitVolumePackage: beginning parallel fileserver startup\n");
+ Log("VInitVolumePackage: using %d threads to pre-attach volumes on %d partitions\n",
+ threads, parts);
+
+ /* create threads to scan disk partitions. */
+ for (i=0; i < threads; i++) {
+ struct vinitvolumepackage_thread_param *params;
+ AFS_SIGSET_DECL;
+
+ params = (struct vinitvolumepackage_thread_param *)malloc(sizeof(struct vinitvolumepackage_thread_param));
+ osi_Assert(params);
+ params->pq = &pq;
+ params->vq = &vq;
+ params->nthreads = threads;
+ params->thread = i+1;
+
+ AFS_SIGSET_CLEAR();
+ osi_Assert(pthread_create (&tid, &attrs, &VInitVolumePackageThread, (void*)params) == 0);
+ AFS_SIGSET_RESTORE();
+ }
+
+ VInitPreAttachVolumes(threads, &vq);
+
+ osi_Assert(pthread_attr_destroy(&attrs) == 0);
+ CV_DESTROY(&pq.cv);
+ MUTEX_DESTROY(&pq.mutex);
+ CV_DESTROY(&vq.cv);
+ MUTEX_DESTROY(&vq.mutex);
+ }
+
+ VOL_LOCK;
+ VSetVInit_r(2); /* Initialized, and all volumes have been attached */
+ CV_BROADCAST(&vol_init_attach_cond);
+ VOL_UNLOCK;
+
+ return 0;
+}
+
+/**
+ * Volume package initialization worker thread. Scan partitions for volume
+ * header files. Gather batches of volume ids and dispatch them to
+ * the main thread to be preattached. The volume preattachement is done
+ * in the main thread to avoid global volume lock contention.
+ */
+static void *
+VInitVolumePackageThread(void *args)
+{
+ struct vinitvolumepackage_thread_param *params;
+ struct DiskPartition64 *partition;
+ struct partition_queue *pq;
+ struct volume_init_queue *vq;
+ struct volume_init_batch *vb;
+
+ osi_Assert(args);
+ params = (struct vinitvolumepackage_thread_param *)args;
+ pq = params->pq;
+ vq = params->vq;
+ osi_Assert(pq);
+ osi_Assert(vq);
+
+ vb = (struct volume_init_batch*)malloc(sizeof(struct volume_init_batch));
+ osi_Assert(vb);
+ vb->thread = params->thread;
+ vb->last = 0;
+ vb->size = 0;
+
+ Log("Scanning partitions on thread %d of %d\n", params->thread, params->nthreads);
+ while((partition = VInitNextPartition(pq))) {
+ DIR *dirp;
+ VolId vid;
+
+ Log("Partition %s: pre-attaching volumes\n", partition->name);
+ dirp = opendir(VPartitionPath(partition));
+ if (!dirp) {
+ Log("opendir on Partition %s failed, errno=%d!\n", partition->name, errno);
+ continue;
+ }
+ while ((vid = VInitNextVolumeId(dirp))) {
+ Volume *vp = (Volume*)malloc(sizeof(Volume));
+ osi_Assert(vp);
+ memset(vp, 0, sizeof(Volume));
+ vp->device = partition->device;
+ vp->partition = partition;
+ vp->hashid = vid;
+ queue_Init(&vp->vnode_list);
+ queue_Init(&vp->rx_call_list);
+ CV_INIT(&V_attachCV(vp), "partattach", CV_DEFAULT, 0);
+
+ vb->batch[vb->size++] = vp;
+ if (vb->size == VINIT_BATCH_MAX_SIZE) {
+ MUTEX_ENTER(&vq->mutex);
+ queue_Append(vq, vb);
+ CV_BROADCAST(&vq->cv);
+ MUTEX_EXIT(&vq->mutex);
+
+ vb = (struct volume_init_batch*)malloc(sizeof(struct volume_init_batch));
+ osi_Assert(vb);
+ vb->thread = params->thread;
+ vb->size = 0;
+ vb->last = 0;
+ }
+ }
+ closedir(dirp);
+ }
+
+ vb->last = 1;
+ MUTEX_ENTER(&vq->mutex);
+ queue_Append(vq, vb);
+ CV_BROADCAST(&vq->cv);
+ MUTEX_EXIT(&vq->mutex);
+
+ Log("Partition scan thread %d of %d ended\n", params->thread, params->nthreads);
+ free(params);
+ return NULL;
+}
+
+/**
+ * Read next element from the pre-populated partition list.
+ */
+static struct DiskPartition64*
+VInitNextPartition(struct partition_queue *pq)
+{
+ struct DiskPartition64 *partition;
+ struct diskpartition_queue_t *dp; /* queue element */
+
+ if (vinit_attach_abort) {
+ Log("Aborting volume preattach thread.\n");
+ return NULL;
+ }
+
+ /* get next partition to scan */
+ MUTEX_ENTER(&pq->mutex);
+ if (queue_IsEmpty(pq)) {
+ MUTEX_EXIT(&pq->mutex);
+ return NULL;
+ }
+ dp = queue_First(pq, diskpartition_queue_t);
+ queue_Remove(dp);
+ MUTEX_EXIT(&pq->mutex);
+
+ osi_Assert(dp);
+ osi_Assert(dp->diskP);
+
+ partition = dp->diskP;
+ free(dp);
+ return partition;
+}
+
+/**
+ * Find next volume id on the partition.
+ */
+static VolId
+VInitNextVolumeId(DIR *dirp)
+{
+ struct dirent *d;
+ VolId vid = 0;
+ char *ext;
+
+ while((d = readdir(dirp))) {
+ if (vinit_attach_abort) {
+ Log("Aborting volume preattach thread.\n");
+ break;
+ }
+ ext = strrchr(d->d_name, '.');
+ if (d->d_name[0] == 'V' && ext && strcmp(ext, VHDREXT) == 0) {
+ vid = VolumeNumber(d->d_name);
+ if (vid) {
+ break;
+ }
+ Log("Warning: bogus volume header file: %s\n", d->d_name);
+ }
+ }
+ return vid;
+}
+
+/**
+ * Preattach volumes in batches to avoid lock contention.
+ */
+static int
+VInitPreAttachVolumes(int nthreads, struct volume_init_queue *vq)
+{
+ struct volume_init_batch *vb;
+ int i;
+
+ while (nthreads) {
+ /* dequeue next volume */
+ MUTEX_ENTER(&vq->mutex);
+ if (queue_IsEmpty(vq)) {
+ CV_WAIT(&vq->cv, &vq->mutex);
+ }
+ vb = queue_First(vq, volume_init_batch);
+ queue_Remove(vb);
+ MUTEX_EXIT(&vq->mutex);
+
+ if (vb->size) {
+ VOL_LOCK;
+ for (i = 0; i<vb->size; i++) {
+ Volume *vp;
+ Volume *dup;
+ Error ec = 0;
+
+ vp = vb->batch[i];
+ dup = VLookupVolume_r(&ec, vp->hashid, NULL);
+ if (ec) {
+ Log("Error looking up volume, code=%d\n", ec);
+ }
+ else if (dup) {
+ Log("Warning: Duplicate volume id %d detected.\n", vp->hashid);
+ }
+ else {
+ /* put pre-attached volume onto the hash table
+ * and bring it up to the pre-attached state */
+ AddVolumeToHashTable(vp, vp->hashid);
+ AddVolumeToVByPList_r(vp);
+ VLRU_Init_Node_r(vp);
+ VChangeState_r(vp, VOL_STATE_PREATTACHED);
+ }
+ }
+ VOL_UNLOCK;
+ }
+
+ if (vb->last) {
+ nthreads--;
+ }
+ free(vb);
+ }
+ return 0;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+#if !defined(AFS_DEMAND_ATTACH_FS)
/*
* attach all volumes on a given disk partition
*/
if (p != NULL && strcmp(p, VHDREXT) == 0) {
Error error;
Volume *vp;
-#ifdef AFS_DEMAND_ATTACH_FS
- vp = VPreAttachVolumeByName(&error, diskP->name, dp->d_name);
-#else /* AFS_DEMAND_ATTACH_FS */
vp = VAttachVolumeByName(&error, diskP->name, dp->d_name,
V_VOLUPD);
-#endif /* AFS_DEMAND_ATTACH_FS */
(*(vp ? nAttached : nUnattached))++;
if (error == VOFFLINE)
Log("Volume %d stays offline (/vice/offline/%s exists)\n", VolumeNumber(dp->d_name), dp->d_name);
diskP->name, VolumeNumber(dp->d_name),
dp->d_name);
}
-#if !defined(AFS_DEMAND_ATTACH_FS)
if (vp) {
VPutVolume(vp);
}
-#endif /* AFS_DEMAND_ATTACH_FS */
}
}
closedir(dirp);
return ret;
}
-
+#endif /* !AFS_DEMAND_ATTACH_FS */
/***************************************************/
/* Shutdown routines */
for (params.n_parts=0, diskP = DiskPartitionList;
diskP; diskP = diskP->next, params.n_parts++);
- Log("VShutdown: shutting down on-line volumes on %d partition%s...\n",
+ Log("VShutdown: shutting down on-line volumes on %d partition%s...\n",
params.n_parts, params.n_parts > 1 ? "s" : "");
+ vol_shutting_down = 1;
+
if (vol_attach_threads > 1) {
/* prepare for parallel shutdown */
params.n_threads = vol_attach_threads;
- assert(pthread_mutex_init(¶ms.lock, NULL) == 0);
- assert(pthread_cond_init(¶ms.cv, NULL) == 0);
- assert(pthread_cond_init(¶ms.master_cv, NULL) == 0);
- assert(pthread_attr_init(&attrs) == 0);
- assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ MUTEX_INIT(¶ms.lock, "params", MUTEX_DEFAULT, 0);
+ CV_INIT(¶ms.cv, "params", CV_DEFAULT, 0);
+ CV_INIT(¶ms.master_cv, "params master", CV_DEFAULT, 0);
+ osi_Assert(pthread_attr_init(&attrs) == 0);
+ osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
queue_Init(¶ms);
/* setup the basic partition information structures for
}
Log("VShutdown: partition %s has %d volumes with attached headers\n",
VPartitionPath(diskP), count);
-
+
/* build up the pass 0 shutdown work queue */
dpq = (struct diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
- assert(dpq != NULL);
+ osi_Assert(dpq != NULL);
dpq->diskP = diskP;
queue_Prepend(¶ms, dpq);
vol_attach_threads, params.n_parts, params.n_parts > 1 ? "s" : "" );
/* do pass 0 shutdown */
- assert(pthread_mutex_lock(¶ms.lock) == 0);
+ MUTEX_ENTER(¶ms.lock);
for (i=0; i < params.n_threads; i++) {
- assert(pthread_create
+ osi_Assert(pthread_create
(&tid, &attrs, &VShutdownThread,
¶ms) == 0);
}
-
+
/* wait for all the pass 0 shutdowns to complete */
while (params.n_threads_complete < params.n_threads) {
- assert(pthread_cond_wait(¶ms.master_cv, ¶ms.lock) == 0);
+ CV_WAIT(¶ms.master_cv, ¶ms.lock);
}
params.n_threads_complete = 0;
params.pass = 1;
- assert(pthread_cond_broadcast(¶ms.cv) == 0);
- assert(pthread_mutex_unlock(¶ms.lock) == 0);
+ CV_BROADCAST(¶ms.cv);
+ MUTEX_EXIT(¶ms.lock);
Log("VShutdown: pass 0 completed using the 1 thread per partition algorithm\n");
Log("VShutdown: starting passes 1 through 3 using finely-granular mp-fast algorithm\n");
/* run the parallel shutdown scheduler. it will drop the glock internally */
ShutdownController(¶ms);
-
+
/* wait for all the workers to finish pass 3 and terminate */
while (params.pass < 4) {
VOL_CV_WAIT(¶ms.cv);
}
-
- assert(pthread_attr_destroy(&attrs) == 0);
- assert(pthread_cond_destroy(¶ms.cv) == 0);
- assert(pthread_cond_destroy(¶ms.master_cv) == 0);
- assert(pthread_mutex_destroy(¶ms.lock) == 0);
+
+ osi_Assert(pthread_attr_destroy(&attrs) == 0);
+ CV_DESTROY(¶ms.cv);
+ CV_DESTROY(¶ms.master_cv);
+ MUTEX_DESTROY(¶ms.lock);
/* drop the VByPList exclusive reservations */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
VShutdown_r(void)
{
int i;
- register Volume *vp, *np;
- register afs_int32 code;
+ Volume *vp, *np;
+ afs_int32 code;
if (VInit < 2) {
Log("VShutdown: aborting attach volumes\n");
}
Log("VShutdown: shutting down on-line volumes...\n");
+ vol_shutting_down = 1;
for (i = 0; i < VolumeHashTable.Size; i++) {
/* try to hold first volume in the hash table */
for (queue_Scan(&VolumeHashTable.Table[i],vp,np,Volume)) {
if (LogLevel >= 5)
Log("VShutdown: Attempting to take volume %u offline.\n",
vp->hashid);
-
+
/* next, take the volume offline (drops reference count) */
VOffline_r(vp, "File server was shut down");
}
void
VShutdown(void)
{
- assert(VInit>0);
+ osi_Assert(VInit>0);
VOL_LOCK;
VShutdown_r();
VOL_UNLOCK;
for (diskP = DiskPartitionList; diskP; diskP=diskP->next) {
id = diskP->index;
Log("ShutdownController: part[%d] : (len=%d, thread_target=%d, done_pass=%d, pass_head=%p)\n",
- id,
+ id,
diskP->vol_list.len,
- shadow.part_thread_target[id],
- shadow.part_done_pass[id],
+ shadow.part_thread_target[id],
+ shadow.part_done_pass[id],
shadow.part_pass_head[id]);
}
/* create the shutdown thread work schedule.
* this scheduler tries to implement fairness
- * by allocating at least 1 thread to each
+ * by allocating at least 1 thread to each
* partition with volumes to be shutdown,
* and then it attempts to allocate remaining
* threads based upon the amount of work left
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
sum += diskP->vol_list.len;
}
-
+
params->schedule_version++;
params->vol_remaining = sum;
/* compute the residues */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
id = diskP->index;
- part_residue[id] = diskP->vol_list.len -
+ part_residue[id] = diskP->vol_list.len -
(params->part_thread_target[id] * thr_workload);
}
params = (vshutdown_thread_t *) args;
/* acquire the shutdown pass 0 lock */
- assert(pthread_mutex_lock(¶ms->lock) == 0);
+ MUTEX_ENTER(¶ms->lock);
/* if there's still pass 0 work to be done,
* get a work entry, and do a pass 0 shutdown */
if (queue_IsNotEmpty(params)) {
dpq = queue_First(params, diskpartition_queue_t);
queue_Remove(dpq);
- assert(pthread_mutex_unlock(¶ms->lock) == 0);
+ MUTEX_EXIT(¶ms->lock);
diskP = dpq->diskP;
free(dpq);
id = diskP->index;
while (ShutdownVolumeWalk_r(diskP, 0, ¶ms->part_pass_head[id]))
count++;
params->stats[0][diskP->index] = count;
- assert(pthread_mutex_lock(¶ms->lock) == 0);
+ MUTEX_ENTER(¶ms->lock);
}
params->n_threads_complete++;
if (params->n_threads_complete == params->n_threads) {
- /* notify control thread that all workers have completed pass 0 */
- assert(pthread_cond_signal(¶ms->master_cv) == 0);
+ /* notify control thread that all workers have completed pass 0 */
+ CV_SIGNAL(¶ms->master_cv);
}
while (params->pass == 0) {
- assert(pthread_cond_wait(¶ms->cv, ¶ms->lock) == 0);
+ CV_WAIT(¶ms->cv, ¶ms->lock);
}
/* switch locks */
- assert(pthread_mutex_unlock(¶ms->lock) == 0);
+ MUTEX_EXIT(¶ms->lock);
VOL_LOCK;
pass = params->pass;
- assert(pass > 0);
+ osi_Assert(pass > 0);
/* now escalate through the more complicated shutdowns */
while (pass <= 3) {
break;
}
}
-
+
if (!found) {
- /* hmm. for some reason the controller thread couldn't find anything for
+ /* hmm. for some reason the controller thread couldn't find anything for
* us to do. let's see if there's anything we can do */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
id = diskP->index;
}
}
}
-
+
/* do work on this partition until either the controller
* creates a new schedule, or we run out of things to do
* on this partition */
ShutdownCreateSchedule(params);
/* wake up all the workers */
- assert(pthread_cond_broadcast(¶ms->cv) == 0);
+ CV_BROADCAST(¶ms->cv);
VOL_UNLOCK;
Log("VShutdown: pass %d completed using %d threads on %d partitions\n",
}
pass = params->pass;
}
-
+
/* for fairness */
VOL_UNLOCK;
pthread_yield();
return NULL;
}
-/* shut down all volumes on a given disk partition
+/* shut down all volumes on a given disk partition
*
* note that this function will not allow mp-fast
* shutdown of a partition */
VVByPListBeginExclusive_r(dp);
/* pick the low-hanging fruit first,
- * then do the complicated ones last
+ * then do the complicated ones last
* (has the advantage of keeping
* in-use volumes up until the bitter end) */
for (pass = 0, total=0; pass < 4; pass++) {
* 0 to only "shutdown" {pre,un}attached and error state volumes
* 1 to also shutdown attached volumes w/ volume header loaded
* 2 to also shutdown attached volumes w/o volume header loaded
- * 3 to also shutdown exclusive state volumes
+ * 3 to also shutdown exclusive state volumes
*
* caller MUST hold exclusive access on the hash chain
* because we drop vol_glock_mutex internally
- *
- * this function is reentrant for passes 1--3
- * (e.g. multiple threads can cooperate to
+ *
+ * this function is reentrant for passes 1--3
+ * (e.g. multiple threads can cooperate to
* shutdown a partition mp-fast)
*
* pass 0 is not scaleable because the volume state data is
ShutdownVByPForPass_r(struct DiskPartition64 * dp, int pass)
{
struct rx_queue * q = queue_First(&dp->vol_list, rx_queue);
- register int i = 0;
+ int i = 0;
while (ShutdownVolumeWalk_r(dp, pass, &q))
i++;
for (queue_ScanFrom(&dp->vol_list, qp, qp, nqp, rx_queue)) {
vp = (Volume *) (((char *)qp) - offsetof(Volume, vol_list));
-
+
switch (pass) {
case 0:
if ((V_attachState(vp) != VOL_STATE_UNATTACHED) &&
(V_attachState(vp) != VOL_STATE_ERROR) &&
+ (V_attachState(vp) != VOL_STATE_DELETED) &&
(V_attachState(vp) != VOL_STATE_PREATTACHED)) {
break;
}
/* wait for other blocking ops to finish */
VWaitExclusiveState_r(vp);
- assert(VIsValidState(V_attachState(vp)));
-
+ osi_Assert(VIsValidState(V_attachState(vp)));
+
switch(V_attachState(vp)) {
case VOL_STATE_SALVAGING:
/* Leave salvaging volumes alone. Any in-progress salvages will
case VOL_STATE_ERROR:
VChangeState_r(vp, VOL_STATE_UNATTACHED);
case VOL_STATE_UNATTACHED:
+ case VOL_STATE_DELETED:
break;
case VOL_STATE_GOING_OFFLINE:
case VOL_STATE_SHUTTING_DOWN:
default:
break;
}
-
+
VCancelReservation_r(vp);
vp = NULL;
return 0;
return;
}
- if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
- *ec = VSALVAGE;
- FDH_REALLYCLOSE(fdP);
- return;
- }
vsn = (struct versionStamp *)to;
- if (FDH_READ(fdP, to, size) != size || vsn->magic != magic) {
+ if (FDH_PREAD(fdP, to, size, 0) != size || vsn->magic != magic) {
*ec = VSALVAGE;
FDH_REALLYCLOSE(fdP);
return;
*ec = VSALVAGE;
return;
}
- if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
- *ec = VSALVAGE;
- FDH_REALLYCLOSE(fdP);
- return;
- }
- if (FDH_WRITE(fdP, (char *)&V_disk(vp), sizeof(V_disk(vp)))
+ if (FDH_PWRITE(fdP, (char *)&V_disk(vp), sizeof(V_disk(vp)), 0)
!= sizeof(V_disk(vp))) {
*ec = VSALVAGE;
FDH_REALLYCLOSE(fdP);
* Converts an on-disk representation of a volume header to
* the in-memory representation of a volume header.
*
- * Makes the assumption that AFS has *always*
+ * Makes the assumption that AFS has *always*
* zero'd the volume header file so that high parts of inode
* numbers are 0 in older (SGI EFS) volume header files.
*/
* @return volume object pointer
*
* @note A pre-attached volume will only have its partition
- * and hashid fields initialized. At first call to
+ * and hashid fields initialized. At first call to
* VGetVolume, the volume will be fully attached.
*
*/
Volume *
VPreAttachVolumeByName_r(Error * ec, char *partition, char *name)
{
- return VPreAttachVolumeById_r(ec,
+ return VPreAttachVolumeById_r(ec,
partition,
VolumeNumber(name));
}
* @internal volume package internal use only.
*/
Volume *
-VPreAttachVolumeById_r(Error * ec,
+VPreAttachVolumeById_r(Error * ec,
char * partition,
VolId volumeId)
{
*ec = 0;
- assert(programType == fileServer);
+ osi_Assert(programType == fileServer);
if (!(partp = VGetPartition_r(partition, 0))) {
*ec = VNOVOL;
* properly in this case.
*
* @note If there is already a volume object registered with
- * the same volume id, its pointer MUST be passed as
+ * the same volume id, its pointer MUST be passed as
* argument vp. Failure to do so will result in a silent
* failure to preattach.
*
* @internal volume package internal use only.
*/
-Volume *
-VPreAttachVolumeByVp_r(Error * ec,
- struct DiskPartition64 * partp,
+Volume *
+VPreAttachVolumeByVp_r(Error * ec,
+ struct DiskPartition64 * partp,
Volume * vp,
VolId vid)
{
*ec = 0;
/* check to see if pre-attach already happened */
- if (vp &&
- (V_attachState(vp) != VOL_STATE_UNATTACHED) &&
+ if (vp &&
+ (V_attachState(vp) != VOL_STATE_UNATTACHED) &&
+ (V_attachState(vp) != VOL_STATE_DELETED) &&
(V_attachState(vp) != VOL_STATE_PREATTACHED) &&
!VIsErrorState(V_attachState(vp))) {
/*
/* allocate the volume structure */
vp = nvp = (Volume *) malloc(sizeof(Volume));
- assert(vp != NULL);
+ osi_Assert(vp != NULL);
memset(vp, 0, sizeof(Volume));
queue_Init(&vp->vnode_list);
- assert(pthread_cond_init(&V_attachCV(vp), NULL) == 0);
+ queue_Init(&vp->rx_call_list);
+ CV_INIT(&V_attachCV(vp), "vp attach", CV_DEFAULT, 0);
}
/* link the volume with its associated vice partition */
vp = nvp;
goto done;
} else {
- /* hack to make up for VChangeState_r() decrementing
+ /* hack to make up for VChangeState_r() decrementing
* the old state counter */
VStats.state_levels[0]++;
}
Volume *
VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode)
{
- register Volume *vp = NULL;
- int fd, n;
- struct afs_stat status;
- struct VolumeDiskHeader diskHeader;
- struct VolumeHeader iheader;
+ Volume *vp = NULL;
struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
VolId volumeId;
+ int checkedOut;
#ifdef AFS_DEMAND_ATTACH_FS
VolumeStats stats_save;
Volume *svp = NULL;
#endif /* AFS_DEMAND_ATTACH_FS */
*ec = 0;
-
+
volumeId = VolumeNumber(name);
if (!(partp = VGetPartition_r(partition, 0))) {
}
if (VRequiresPartLock()) {
- assert(VInit == 3);
+ osi_Assert(VInit == 3);
VLockPartition_r(partition);
} else if (programType == fileServer) {
#ifdef AFS_DEMAND_ATTACH_FS
* - GOING_OFFLINE
* - SALVAGING
* - ERROR
+ * - DELETED
*/
if (vp->specialStatus == VBUSY)
isbusy = 1;
-
+
/* if it's already attached, see if we can return it */
if (V_attachState(vp) == VOL_STATE_ATTACHED) {
VGetVolumeByVp_r(ec, vp);
}
/* pre-attach volume if it hasn't been done yet */
- if (!vp ||
+ if (!vp ||
(V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ (V_attachState(vp) == VOL_STATE_DELETED) ||
(V_attachState(vp) == VOL_STATE_ERROR)) {
svp = vp;
vp = VPreAttachVolumeByVp_r(ec, partp, vp, volumeId);
}
}
- assert(vp != NULL);
+ osi_Assert(vp != NULL);
- /* handle pre-attach races
+ /* handle pre-attach races
*
* multiple threads can race to pre-attach a volume,
* but we can't let them race beyond that
- *
+ *
* our solution is to let the first thread to bring
* the volume into an exclusive state win; the other
* threads just wait until it finishes bringing the
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
- if ((fd = afs_open(path, O_RDONLY)) == -1 || afs_fstat(fd, &status) == -1) {
- Log("VAttachVolume: Failed to open %s (errno %d)\n", path, errno);
- if (fd > -1)
- close(fd);
- *ec = VNOVOL;
- VOL_LOCK;
- goto done;
- }
- n = read(fd, &diskHeader, sizeof(diskHeader));
- close(fd);
- if (n != sizeof(diskHeader)
- || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
- Log("VAttachVolume: Error reading volume header %s\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
- if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
- Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
-
- DiskToVolumeHeader(&iheader, &diskHeader);
-#ifdef FSSYNC_BUILD_CLIENT
- if (VCanUseFSSYNC() && mode != V_SECRETLY && mode != V_PEEK) {
- SYNC_response res;
- memset(&res, 0, sizeof(res));
-
- VOL_LOCK;
- if (FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_NEEDVOLUME, mode, &res)
- != SYNC_OK) {
-
- if (res.hdr.reason == FSYNC_SALVAGE) {
- Log("VAttachVolume: file server says volume %u is salvaging\n",
- iheader.id);
- *ec = VSALVAGING;
- } else {
- Log("VAttachVolume: attach of volume %u apparently denied by file server\n",
- iheader.id);
- *ec = VNOVOL; /* XXXX */
- }
-
- goto done;
- }
- VOL_UNLOCK;
- }
-#endif
if (!vp) {
vp = (Volume *) calloc(1, sizeof(Volume));
- assert(vp != NULL);
+ osi_Assert(vp != NULL);
vp->hashid = volumeId;
vp->device = partp->device;
vp->partition = partp;
queue_Init(&vp->vnode_list);
+ queue_Init(&vp->rx_call_list);
#ifdef AFS_DEMAND_ATTACH_FS
- assert(pthread_cond_init(&V_attachCV(vp), NULL) == 0);
+ CV_INIT(&V_attachCV(vp), "vp attach", CV_DEFAULT, 0);
#endif /* AFS_DEMAND_ATTACH_FS */
}
/* attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode, &checkedOut);
if (VCanUseFSSYNC() && vp) {
+#ifdef AFS_DEMAND_ATTACH_FS
if ((mode == V_VOLUPD) || (VolumeWriteable(vp) && (mode == V_CLONE))) {
/* mark volume header as in use so that volser crashes lead to a
* salvage attempt */
VUpdateVolume_r(ec, vp, 0);
}
-#ifdef AFS_DEMAND_ATTACH_FS
/* for dafs, we should tell the fileserver, except for V_PEEK
* where we know it is not necessary */
if (mode == V_PEEK) {
vp->needsPutBack = 0;
} else {
- vp->needsPutBack = 1;
+ vp->needsPutBack = VOL_PUTBACK;
}
#else /* !AFS_DEMAND_ATTACH_FS */
/* duplicate computation in fssync.c about whether the server
|| (!VolumeWriteable(vp) && (mode == V_CLONE || mode == V_DUMP)))
vp->needsPutBack = 0;
else
- vp->needsPutBack = 1;
+ vp->needsPutBack = VOL_PUTBACK;
#endif /* !AFS_DEMAND_ATTACH_FS */
}
- /* OK, there's a problem here, but one that I don't know how to
- * fix right now, and that I don't think should arise often.
- * Basically, we should only put back this volume to the server if
- * it was given to us by the server, but since we don't have a vp,
- * we can't run the VolumeWriteable function to find out as we do
- * above when computing vp->needsPutBack. So we send it back, but
- * there's a path in VAttachVolume on the server which may abort
- * if this volume doesn't have a header. Should be pretty rare
- * for all of that to happen, but if it does, probably the right
- * fix is for the server to allow the return of readonly volumes
- * that it doesn't think are really checked out. */
#ifdef FSSYNC_BUILD_CLIENT
- if (VCanUseFSSYNC() && vp == NULL &&
- mode != V_SECRETLY && mode != V_PEEK) {
+ /* Only give back the vol to the fileserver if we checked it out; attach2
+ * will set checkedOut only if we successfully checked it out from the
+ * fileserver. */
+ if (VCanUseFSSYNC() && vp == NULL && checkedOut) {
#ifdef AFS_DEMAND_ATTACH_FS
/* If we couldn't attach but we scheduled a salvage, we already
* notified the fileserver; don't online it now */
if (*ec != VSALVAGING)
#endif /* AFS_DEMAND_ATTACH_FS */
- FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_ON, 0, NULL);
- } else
+ FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, 0, NULL);
+ } else
#endif
if (programType == fileServer && vp) {
#ifdef AFS_DEMAND_ATTACH_FS
- /*
+ /*
* we can get here in cases where we don't "own"
* the volume (e.g. volume owned by a utility).
* short circuit around potential disk header races.
goto done;
}
#endif
- V_needsCallback(vp) = 0;
-#ifdef notdef
- if (VInit >= 2 && V_BreakVolumeCallbacks) {
- Log("VAttachVolume: Volume %u was changed externally; breaking callbacks\n", V_id(vp));
- (*V_BreakVolumeCallbacks) (V_id(vp));
- }
-#endif
VUpdateVolume_r(ec, vp, 0);
if (*ec) {
Log("VAttachVolume: Error updating volume\n");
VAttachVolumeByVp_r(Error * ec, Volume * vp, int mode)
{
char name[VMAXPATHLEN];
- int fd, n, reserve = 0;
- struct afs_stat status;
- struct VolumeDiskHeader diskHeader;
- struct VolumeHeader iheader;
+ int reserve = 0;
struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
VolId volumeId;
Volume * nvp = NULL;
VolumeStats stats_save;
+ int checkedOut;
*ec = 0;
/* volume utility should never call AttachByVp */
- assert(programType == fileServer);
-
+ osi_Assert(programType == fileServer);
+
volumeId = vp->hashid;
partp = vp->partition;
VolumeExternalName_r(volumeId, name, sizeof(name));
}
/* pre-attach volume if it hasn't been done yet */
- if (!vp ||
+ if (!vp ||
(V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ (V_attachState(vp) == VOL_STATE_DELETED) ||
(V_attachState(vp) == VOL_STATE_ERROR)) {
nvp = VPreAttachVolumeByVp_r(ec, partp, vp, volumeId);
if (*ec) {
vp = nvp;
}
}
-
- assert(vp != NULL);
+
+ osi_Assert(vp != NULL);
VChangeState_r(vp, VOL_STATE_ATTACHING);
/* restore monotonically increasing stats */
*ec = 0;
-
- /* compute path to disk header,
- * read in header,
- * and verify magic and version stamps */
+ /* compute path to disk header */
strcpy(path, VPartitionPath(partp));
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
- if ((fd = afs_open(path, O_RDONLY)) == -1 || afs_fstat(fd, &status) == -1) {
- Log("VAttachVolume: Failed to open %s (errno %d)\n", path, errno);
- if (fd > -1)
- close(fd);
- *ec = VNOVOL;
- VOL_LOCK;
- goto done;
- }
- n = read(fd, &diskHeader, sizeof(diskHeader));
- close(fd);
- if (n != sizeof(diskHeader)
- || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
- Log("VAttachVolume: Error reading volume header %s\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
- if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
- Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
-
- /* convert on-disk header format to in-memory header format */
- DiskToVolumeHeader(&iheader, &diskHeader);
/* do volume attach
*
* NOTE: attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode, &checkedOut);
/*
* the event that an error was encountered, or
* for any reason, skip to the end. We cannot
* safely call VUpdateVolume unless we "own" it.
*/
- if (*ec ||
+ if (*ec ||
(vp == NULL) ||
(V_attachState(vp) != VOL_STATE_ATTACHED)) {
goto done;
}
- V_needsCallback(vp) = 0;
VUpdateVolume_r(ec, vp, 0);
if (*ec) {
Log("VAttachVolume: Error updating volume %u\n", vp->hashid);
return vp;
}
}
+
+/**
+ * lock a volume on disk (non-blocking).
+ *
+ * @param[in] vp The volume to lock
+ * @param[in] locktype READ_LOCK or WRITE_LOCK
+ *
+ * @return operation status
+ * @retval 0 success, lock was obtained
+ * @retval EBUSY a conflicting lock was held by another process
+ * @retval EIO error acquiring lock
+ *
+ * @pre If we're in the fileserver, vp is in an exclusive state
+ *
+ * @pre vp is not already locked
+ */
+static int
+VLockVolumeNB(Volume *vp, int locktype)
+{
+ int code;
+
+ osi_Assert(programType != fileServer || VIsExclusiveState(V_attachState(vp)));
+ osi_Assert(!(V_attachFlags(vp) & VOL_LOCKED));
+
+ code = VLockVolumeByIdNB(vp->hashid, vp->partition, locktype);
+ if (code == 0) {
+ V_attachFlags(vp) |= VOL_LOCKED;
+ }
+
+ return code;
+}
+
+/**
+ * unlock a volume on disk that was locked with VLockVolumeNB.
+ *
+ * @param[in] vp volume to unlock
+ *
+ * @pre If we're in the fileserver, vp is in an exclusive state
+ *
+ * @pre vp has already been locked
+ */
+static void
+VUnlockVolume(Volume *vp)
+{
+ osi_Assert(programType != fileServer || VIsExclusiveState(V_attachState(vp)));
+ osi_Assert((V_attachFlags(vp) & VOL_LOCKED));
+
+ VUnlockVolumeById(vp->hashid, vp->partition);
+
+ V_attachFlags(vp) &= ~VOL_LOCKED;
+}
#endif /* AFS_DEMAND_ATTACH_FS */
-/*
- * called without any locks held
- * returns with vol_glock_mutex held
- */
-private Volume *
-attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * header,
- struct DiskPartition64 * partp, register Volume * vp, int isbusy, int mode)
-{
- vp->specialStatus = (byte) (isbusy ? VBUSY : 0);
- IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header->parent,
- header->largeVnodeIndex);
- IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header->parent,
- header->smallVnodeIndex);
- IH_INIT(vp->diskDataHandle, partp->device, header->parent,
- header->volumeInfo);
- IH_INIT(vp->linkHandle, partp->device, header->parent, header->linkTable);
- vp->shuttingDown = 0;
- vp->goingOffline = 0;
- vp->nUsers = 1;
+/**
+ * read in a vol header, possibly lock the vol header, and possibly check out
+ * the vol header from the fileserver, as part of volume attachment.
+ *
+ * @param[out] ec error code
+ * @param[in] vp volume pointer object
+ * @param[in] partp disk partition object of the attaching partition
+ * @param[in] mode attachment mode such as V_VOLUPD, V_DUMP, etc (see
+ * volume.h)
+ * @param[in] peek 1 to just try to read in the volume header and make sure
+ * we don't try to lock the vol, or check it out from
+ * FSSYNC or anything like that; 0 otherwise, for 'normal'
+ * operation
+ * @param[out] acheckedOut If we successfully checked-out the volume from
+ * the fileserver (if we needed to), this is set
+ * to 1, otherwise it is untouched.
+ *
+ * @note As part of DAFS volume attachment, the volume header may be either
+ * read- or write-locked to ensure mutual exclusion of certain volume
+ * operations. In some cases in order to determine whether we need to
+ * read- or write-lock the header, we need to read in the header to see
+ * if the volume is RW or not. So, if we read in the header under a
+ * read-lock and determine that we actually need a write-lock on the
+ * volume header, this function will drop the read lock, acquire a write
+ * lock, and read the header in again.
+ */
+static void
+attach_volume_header(Error *ec, Volume *vp, struct DiskPartition64 *partp,
+ int mode, int peek, int *acheckedOut)
+{
+ struct VolumeDiskHeader diskHeader;
+ struct VolumeHeader header;
+ int code;
+ int first_try = 1;
+ int lock_tries = 0, checkout_tries = 0;
+ int retry;
+ VolumeId volid = vp->hashid;
+#ifdef FSSYNC_BUILD_CLIENT
+ int checkout, done_checkout = 0;
+#endif /* FSSYNC_BUILD_CLIENT */
#ifdef AFS_DEMAND_ATTACH_FS
- vp->stats.last_attach = FT_ApproxTime();
- vp->stats.attaches++;
+ int locktype = 0, use_locktype = -1;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ retry:
+ retry = 0;
+ *ec = 0;
+
+ if (lock_tries > VOL_MAX_CHECKOUT_RETRIES) {
+ Log("VAttachVolume: retried too many times trying to lock header for "
+ "vol %lu part %s; giving up\n", afs_printable_uint32_lu(volid),
+ VPartitionPath(partp));
+ *ec = VNOVOL;
+ goto done;
+ }
+ if (checkout_tries > VOL_MAX_CHECKOUT_RETRIES) {
+ Log("VAttachVolume: retried too many times trying to checkout "
+ "vol %lu part %s; giving up\n", afs_printable_uint32_lu(volid),
+ VPartitionPath(partp));
+ *ec = VNOVOL;
+ goto done;
+ }
+
+ if (VReadVolumeDiskHeader(volid, partp, NULL)) {
+ /* short-circuit the 'volume does not exist' case */
+ *ec = VNOVOL;
+ goto done;
+ }
+
+#ifdef FSSYNC_BUILD_CLIENT
+ checkout = !done_checkout;
+ done_checkout = 1;
+ if (!peek && checkout && VMustCheckoutVolume(mode)) {
+ SYNC_response res;
+ memset(&res, 0, sizeof(res));
+
+ if (FSYNC_VolOp(volid, partp->name, FSYNC_VOL_NEEDVOLUME, mode, &res)
+ != SYNC_OK) {
+
+ if (res.hdr.reason == FSYNC_SALVAGE) {
+ Log("VAttachVolume: file server says volume %lu is salvaging\n",
+ afs_printable_uint32_lu(volid));
+ *ec = VSALVAGING;
+ } else {
+ Log("VAttachVolume: attach of volume %lu apparently denied by file server\n",
+ afs_printable_uint32_lu(volid));
+ *ec = VNOVOL; /* XXXX */
+ }
+ goto done;
+ }
+ *acheckedOut = 1;
+ }
#endif
- VOL_LOCK;
- IncUInt64(&VStats.attaches);
- vp->cacheCheck = ++VolumeCacheCheck;
- /* just in case this ever rolls over */
- if (!vp->cacheCheck)
- vp->cacheCheck = ++VolumeCacheCheck;
- GetVolumeHeader(vp);
- VOL_UNLOCK;
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (use_locktype < 0) {
+ /* don't know whether vol is RO or RW; assume it's RO and we can retry
+ * if it turns out to be RW */
+ locktype = VVolLockType(mode, 0);
+
+ } else {
+ /* a previous try says we should use use_locktype to lock the volume,
+ * so use that */
+ locktype = use_locktype;
+ }
+
+ if (!peek && locktype) {
+ code = VLockVolumeNB(vp, locktype);
+ if (code) {
+ if (code == EBUSY) {
+ Log("VAttachVolume: another program has vol %lu locked\n",
+ afs_printable_uint32_lu(volid));
+ } else {
+ Log("VAttachVolume: error %d trying to lock vol %lu\n",
+ code, afs_printable_uint32_lu(volid));
+ }
+
+ *ec = VNOVOL;
+ goto done;
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ code = VReadVolumeDiskHeader(volid, partp, &diskHeader);
+ if (code) {
+ if (code == EIO) {
+ *ec = VSALVAGE;
+ } else {
+ *ec = VNOVOL;
+ }
+ goto done;
+ }
+
+ DiskToVolumeHeader(&header, &diskHeader);
+
+ IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header.parent,
+ header.largeVnodeIndex);
+ IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header.parent,
+ header.smallVnodeIndex);
+ IH_INIT(vp->diskDataHandle, partp->device, header.parent,
+ header.volumeInfo);
+ IH_INIT(vp->linkHandle, partp->device, header.parent, header.linkTable);
+
+ if (first_try) {
+ /* only need to do this once */
+ VOL_LOCK;
+ GetVolumeHeader(vp);
+ VOL_UNLOCK;
+ }
#if defined(AFS_DEMAND_ATTACH_FS) && defined(FSSYNC_BUILD_CLIENT)
/* demand attach changes the V_PEEK mechanism
* to demand attach fileservers. However, I'm trying
* to limit the number of common code changes)
*/
- if (programType != fileServer && mode == V_PEEK) {
+ if (VCanUseFSSYNC() && (mode == V_PEEK || peek)) {
SYNC_response res;
res.payload.len = sizeof(VolumeDiskData);
res.payload.buf = &vp->header->diskstuff;
- if (FSYNC_VolOp(volumeId,
+ if (FSYNC_VolOp(vp->hashid,
partp->name,
FSYNC_VOL_QUERY_HDR,
FSYNC_WHATEVER,
IncUInt64(&vp->stats.hdr_loads);
VOL_UNLOCK;
#endif /* AFS_DEMAND_ATTACH_FS */
-
+
if (*ec) {
- Log("VAttachVolume: Error reading diskDataHandle vol header %s; error=%u\n", path, *ec);
+ Log("VAttachVolume: Error reading diskDataHandle header for vol %lu; "
+ "error=%u\n", afs_printable_uint32_lu(volid), *ec);
+ goto done;
}
#ifdef AFS_DEMAND_ATTACH_FS
# ifdef FSSYNC_BUILD_CLIENT
disk_header_loaded:
-#endif
- if (!*ec) {
-
- /* check for pending volume operations */
- if (vp->pending_vol_op) {
- /* see if the pending volume op requires exclusive access */
- switch (vp->pending_vol_op->vol_op_state) {
- case FSSYNC_VolOpPending:
- /* this should never happen */
- assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpPending);
- break;
-
- case FSSYNC_VolOpRunningUnknown:
- if (VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
- vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOnline;
- break;
- } else {
- vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOffline;
- /* fall through to take volume offline */
- }
-
- case FSSYNC_VolOpRunningOffline:
- /* mark the volume down */
- *ec = VOFFLINE;
- VChangeState_r(vp, VOL_STATE_UNATTACHED);
- if (V_offlineMessage(vp)[0] == '\0')
- strlcpy(V_offlineMessage(vp),
- "A volume utility is running.",
- sizeof(V_offlineMessage(vp)));
- V_offlineMessage(vp)[sizeof(V_offlineMessage(vp)) - 1] = '\0';
-
- /* check to see if we should set the specialStatus flag */
- if (VVolOpSetVBusy_r(vp, vp->pending_vol_op)) {
- vp->specialStatus = VBUSY;
- }
- default:
- break;
- }
- }
+# endif /* FSSYNC_BUILD_CLIENT */
- V_attachFlags(vp) |= VOL_HDR_LOADED;
- vp->stats.last_hdr_load = vp->stats.last_attach;
+ /* if the lock type we actually used to lock the volume is different than
+ * the lock type we should have used, retry with the lock type we should
+ * use */
+ use_locktype = VVolLockType(mode, VolumeWriteable(vp));
+ if (locktype != use_locktype) {
+ retry = 1;
+ lock_tries++;
}
#endif /* AFS_DEMAND_ATTACH_FS */
- if (!*ec) {
- struct IndexFileHeader iHead;
+ *ec = 0;
-#if OPENAFS_VOL_STATS
- /*
- * We just read in the diskstuff part of the header. If the detailed
- * volume stats area has not yet been initialized, we should bzero the
- * area and mark it as initialized.
- */
- if (!(V_stat_initialized(vp))) {
- memset((V_stat_area(vp)), 0, VOL_STATS_BYTES);
- V_stat_initialized(vp) = 1;
- }
-#endif /* OPENAFS_VOL_STATS */
+ done:
+#if defined(AFS_DEMAND_ATTACH_FS) && defined(FSSYNC_BUILD_CLIENT)
+ if (!peek && *ec == 0 && retry == 0 && VMustCheckoutVolume(mode)) {
- (void)ReadHeader(ec, vp->vnodeIndex[vSmall].handle,
- (char *)&iHead, sizeof(iHead),
- SMALLINDEXMAGIC, SMALLINDEXVERSION);
+ code = FSYNC_VerifyCheckout(volid, partp->name, FSYNC_VOL_NEEDVOLUME, mode);
- if (*ec) {
- Log("VAttachVolume: Error reading smallVnode vol header %s; error=%u\n", path, *ec);
+ if (code == SYNC_DENIED) {
+ /* must retry checkout; fileserver no longer thinks we have
+ * the volume */
+ retry = 1;
+ checkout_tries++;
+ done_checkout = 0;
+
+ } else if (code != SYNC_OK) {
+ *ec = VNOVOL;
}
}
+#endif /* AFS_DEMAND_ATTACH_FS && FSSYNC_BUILD_CLIENT */
- if (!*ec) {
- struct IndexFileHeader iHead;
-
- (void)ReadHeader(ec, vp->vnodeIndex[vLarge].handle,
- (char *)&iHead, sizeof(iHead),
- LARGEINDEXMAGIC, LARGEINDEXVERSION);
+ if (*ec || retry) {
+ /* either we are going to be called again for a second pass, or we
+ * encountered an error; clean up in either case */
- if (*ec) {
- Log("VAttachVolume: Error reading largeVnode vol header %s; error=%u\n", path, *ec);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if ((V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+ if (vp->linkHandle) {
+ IH_RELEASE(vp->vnodeIndex[vLarge].handle);
+ IH_RELEASE(vp->vnodeIndex[vSmall].handle);
+ IH_RELEASE(vp->diskDataHandle);
+ IH_RELEASE(vp->linkHandle);
}
}
-#ifdef AFS_NAMEI_ENV
- if (!*ec) {
- struct versionStamp stamp;
+ if (*ec) {
+ return;
+ }
+ if (retry) {
+ first_try = 0;
+ goto retry;
+ }
+}
- (void)ReadHeader(ec, V_linkHandle(vp), (char *)&stamp,
- sizeof(stamp), LINKTABLEMAGIC, LINKTABLEVERSION);
+#ifdef AFS_DEMAND_ATTACH_FS
+static void
+attach_check_vop(Error *ec, VolumeId volid, struct DiskPartition64 *partp,
+ Volume *vp, int *acheckedOut)
+{
+ *ec = 0;
- if (*ec) {
- Log("VAttachVolume: Error reading namei vol header %s; error=%u\n", path, *ec);
- }
- }
-#endif /* AFS_NAMEI_ENV */
+ if (vp->pending_vol_op) {
-#if defined(AFS_DEMAND_ATTACH_FS)
- if (*ec && ((*ec != VOFFLINE) || (V_attachState(vp) != VOL_STATE_UNATTACHED))) {
- VOL_LOCK;
- if (!VCanScheduleSalvage()) {
- Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
- }
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- vp->nUsers = 0;
+ VOL_LOCK;
- VCheckFree(vp);
- return NULL;
+ if (vp->pending_vol_op->vol_op_state == FSSYNC_VolOpRunningUnknown) {
+ int code;
+ code = VVolOpLeaveOnlineNoHeader_r(vp, vp->pending_vol_op);
+ if (code == 1) {
+ vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOnline;
+ } else if (code == 0) {
+ vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOffline;
+
+ } else {
+ /* we need the vol header to determine if the volume can be
+ * left online for the vop, so... get the header */
+
+ VOL_UNLOCK;
+
+ /* attach header with peek=1 to avoid checking out the volume
+ * or locking it; we just want the header info, we're not
+ * messing with the volume itself at all */
+ attach_volume_header(ec, vp, partp, V_PEEK, 1, acheckedOut);
+ if (*ec) {
+ return;
+ }
+
+ VOL_LOCK;
+
+ if (VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
+ vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOnline;
+ } else {
+ vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOffline;
+ }
+
+ /* make sure we grab a new vol header and re-open stuff on
+ * actual attachment; we can't keep the data we grabbed, since
+ * it was not done under a lock and thus not safe */
+ FreeVolumeHeader(vp);
+ VReleaseVolumeHandles_r(vp);
+ }
+ }
+ /* see if the pending volume op requires exclusive access */
+ switch (vp->pending_vol_op->vol_op_state) {
+ case FSSYNC_VolOpPending:
+ /* this should never happen */
+ osi_Assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpPending);
+ break;
+
+ case FSSYNC_VolOpRunningUnknown:
+ /* this should never happen; we resolved 'unknown' above */
+ osi_Assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpRunningUnknown);
+ break;
+
+ case FSSYNC_VolOpRunningOffline:
+ /* mark the volume down */
+ *ec = VOFFLINE;
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
+
+ /* do not set V_offlineMessage here; we don't have ownership of
+ * the volume (and probably do not have the header loaded), so we
+ * can't alter the disk header */
+
+ /* check to see if we should set the specialStatus flag */
+ if (VVolOpSetVBusy_r(vp, vp->pending_vol_op)) {
+ /* don't overwrite specialStatus if it was already set to
+ * something else (e.g. VMOVED) */
+ if (!vp->specialStatus) {
+ vp->specialStatus = VBUSY;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ VOL_UNLOCK;
+ }
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * volume attachment helper function.
+ *
+ * @param[out] ec error code
+ * @param[in] volumeId volume ID of the attaching volume
+ * @param[in] path full path to the volume header .vol file
+ * @param[in] partp disk partition object for the attaching partition
+ * @param[in] vp volume object; vp->hashid, vp->device, vp->partition,
+ * vp->vnode_list, vp->rx_call_list, and V_attachCV (for
+ * DAFS) should already be initialized
+ * @param[in] isbusy 1 if vp->specialStatus should be set to VBUSY; that is,
+ * if there is a volume operation running for this volume
+ * that should set the volume to VBUSY during its run. 0
+ * otherwise. (see VVolOpSetVBusy_r)
+ * @param[in] mode attachment mode such as V_VOLUPD, V_DUMP, etc (see
+ * volume.h)
+ * @param[out] acheckedOut If we successfully checked-out the volume from
+ * the fileserver (if we needed to), this is set
+ * to 1, otherwise it is 0.
+ *
+ * @return pointer to the semi-attached volume pointer
+ * @retval NULL an error occurred (check value of *ec)
+ * @retval vp volume successfully attaching
+ *
+ * @pre no locks held
+ *
+ * @post VOL_LOCK held
+ */
+static Volume *
+attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
+ Volume * vp, int isbusy, int mode, int *acheckedOut)
+{
+ /* have we read in the header successfully? */
+ int read_header = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* should we FreeVolume(vp) instead of VCheckFree(vp) in the error
+ * cleanup? */
+ int forcefree = 0;
+
+ /* in the case of an error, to what state should the volume be
+ * transitioned? */
+ VolState error_state = VOL_STATE_ERROR;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ *ec = 0;
+
+ vp->vnodeIndex[vLarge].handle = NULL;
+ vp->vnodeIndex[vSmall].handle = NULL;
+ vp->diskDataHandle = NULL;
+ vp->linkHandle = NULL;
+
+ *acheckedOut = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ attach_check_vop(ec, volumeId, partp, vp, acheckedOut);
+ if (!*ec) {
+ attach_volume_header(ec, vp, partp, mode, 0, acheckedOut);
+ }
+#else
+ attach_volume_header(ec, vp, partp, mode, 0, acheckedOut);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+ if (*ec == VNOVOL) {
+ /* if the volume doesn't exist, skip straight to 'error' so we don't
+ * request a salvage */
+ goto unlocked_error;
+ }
+
+ if (!*ec) {
+ read_header = 1;
+
+ /* ensure that we don't override specialStatus if it was set to
+ * something else (e.g. VMOVED) */
+ if (isbusy && !vp->specialStatus) {
+ vp->specialStatus = VBUSY;
+ }
+ vp->shuttingDown = 0;
+ vp->goingOffline = 0;
+ vp->nUsers = 1;
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp->stats.last_attach = FT_ApproxTime();
+ vp->stats.attaches++;
+#endif
+
+ VOL_LOCK;
+ IncUInt64(&VStats.attaches);
+ vp->cacheCheck = ++VolumeCacheCheck;
+ /* just in case this ever rolls over */
+ if (!vp->cacheCheck)
+ vp->cacheCheck = ++VolumeCacheCheck;
+ VOL_UNLOCK;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ V_attachFlags(vp) |= VOL_HDR_LOADED;
+ vp->stats.last_hdr_load = vp->stats.last_attach;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+
+ if (!*ec) {
+ struct IndexFileHeader iHead;
+
+#if OPENAFS_VOL_STATS
+ /*
+ * We just read in the diskstuff part of the header. If the detailed
+ * volume stats area has not yet been initialized, we should bzero the
+ * area and mark it as initialized.
+ */
+ if (!(V_stat_initialized(vp))) {
+ memset((V_stat_area(vp)), 0, VOL_STATS_BYTES);
+ V_stat_initialized(vp) = 1;
+ }
+#endif /* OPENAFS_VOL_STATS */
+
+ (void)ReadHeader(ec, vp->vnodeIndex[vSmall].handle,
+ (char *)&iHead, sizeof(iHead),
+ SMALLINDEXMAGIC, SMALLINDEXVERSION);
+
+ if (*ec) {
+ Log("VAttachVolume: Error reading smallVnode vol header %s; error=%u\n", path, *ec);
+ }
+ }
+
+ if (!*ec) {
+ struct IndexFileHeader iHead;
+
+ (void)ReadHeader(ec, vp->vnodeIndex[vLarge].handle,
+ (char *)&iHead, sizeof(iHead),
+ LARGEINDEXMAGIC, LARGEINDEXVERSION);
+
+ if (*ec) {
+ Log("VAttachVolume: Error reading largeVnode vol header %s; error=%u\n", path, *ec);
+ }
+ }
+
+#ifdef AFS_NAMEI_ENV
+ if (!*ec) {
+ struct versionStamp stamp;
+
+ (void)ReadHeader(ec, V_linkHandle(vp), (char *)&stamp,
+ sizeof(stamp), LINKTABLEMAGIC, LINKTABLEVERSION);
+
+ if (*ec) {
+ Log("VAttachVolume: Error reading namei vol header %s; error=%u\n", path, *ec);
+ }
+ }
+#endif /* AFS_NAMEI_ENV */
+
+#if defined(AFS_DEMAND_ATTACH_FS)
+ if (*ec && ((*ec != VOFFLINE) || (V_attachState(vp) != VOL_STATE_UNATTACHED))) {
+ VOL_LOCK;
+ if (!VCanScheduleSalvage()) {
+ Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
+ }
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
+ VOL_SALVAGE_NO_OFFLINE);
+ vp->nUsers = 0;
+
+ goto locked_error;
} else if (*ec) {
/* volume operation in progress */
- VOL_LOCK;
- VCheckFree(vp);
- return NULL;
+ goto unlocked_error;
}
#else /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
- VOL_LOCK;
- FreeVolume(vp);
- return NULL;
+ goto unlocked_error;
}
#endif /* AFS_DEMAND_ATTACH_FS */
if (!VCanScheduleSalvage()) {
Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
}
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
+ VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
- VCheckFree(vp);
#else /* AFS_DEMAND_ATTACH_FS */
- FreeVolume(vp);
*ec = VSALVAGE;
#endif /* AFS_DEMAND_ATTACH_FS */
- return NULL;
+
+ goto locked_error;
}
VOL_LOCK;
- if (VShouldCheckInUse(mode)) {
-#ifndef FAST_RESTART
- if (V_inUse(vp) && VolumeWriteable(vp)) {
- if (!V_needsSalvaged(vp)) {
- V_needsSalvaged(vp) = 1;
- VUpdateVolume_r(ec, vp, 0);
- }
+ vp->nextVnodeUnique = V_uniquifier(vp);
+
+ if (VShouldCheckInUse(mode) && V_inUse(vp) && VolumeWriteable(vp)) {
+ if (!V_needsSalvaged(vp)) {
+ V_needsSalvaged(vp) = 1;
+ VUpdateVolume_r(ec, vp, 0);
+ }
#if defined(AFS_DEMAND_ATTACH_FS)
- if (!VCanScheduleSalvage()) {
- Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
- }
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
- vp->nUsers = 0;
+ if (!VCanScheduleSalvage()) {
+ Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
+ }
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
+ VOL_SALVAGE_NO_OFFLINE);
+ vp->nUsers = 0;
- VCheckFree(vp);
#else /* AFS_DEMAND_ATTACH_FS */
- Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
- FreeVolume(vp);
- *ec = VSALVAGE;
+ Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
+ *ec = VSALVAGE;
#endif /* AFS_DEMAND_ATTACH_FS */
- return NULL;
- }
-#endif /* FAST_RESTART */
- if (programType == fileServer && V_destroyMe(vp) == DESTROY_ME) {
- /* Only check destroyMe if we are the fileserver, since the
- * volserver et al sometimes need to work with volumes with
- * destroyMe set. Examples are 'temporary' volumes the
- * volserver creates, and when we create a volume (destroyMe
- * is set on creation; sometimes a separate volserver
- * transaction is created to clear destroyMe).
- */
+ goto locked_error;
+ }
+
+ if (programType == fileServer && V_destroyMe(vp) == DESTROY_ME) {
+ /* Only check destroyMe if we are the fileserver, since the
+ * volserver et al sometimes need to work with volumes with
+ * destroyMe set. Examples are 'temporary' volumes the
+ * volserver creates, and when we create a volume (destroyMe
+ * is set on creation; sometimes a separate volserver
+ * transaction is created to clear destroyMe).
+ */
#if defined(AFS_DEMAND_ATTACH_FS)
- /* schedule a salvage so the volume goes away on disk */
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- VChangeState_r(vp, VOL_STATE_ERROR);
- vp->nUsers = 0;
+ /* schedule a salvage so the volume goes away on disk */
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
+ VOL_SALVAGE_NO_OFFLINE);
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ vp->nUsers = 0;
+ forcefree = 1;
#endif /* AFS_DEMAND_ATTACH_FS */
- FreeVolume(vp);
- Log("VAttachVolume: volume %s is junk; it should be destroyed at next salvage\n", path);
- *ec = VNOVOL;
- return NULL;
- }
+ Log("VAttachVolume: volume %s is junk; it should be destroyed at next salvage\n", path);
+ *ec = VNOVOL;
+ goto locked_error;
}
- vp->nextVnodeUnique = V_uniquifier(vp);
vp->vnodeIndex[vSmall].bitmap = vp->vnodeIndex[vLarge].bitmap = NULL;
#ifndef BITMAP_LATER
if (programType == fileServer && VolumeWriteable(vp)) {
VGetBitmap_r(ec, vp, i);
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
+ VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
- VCheckFree(vp);
-#else /* AFS_DEMAND_ATTACH_FS */
- FreeVolume(vp);
#endif /* AFS_DEMAND_ATTACH_FS */
Log("VAttachVolume: error getting bitmap for volume (%s)\n",
path);
- return NULL;
+ goto locked_error;
}
}
}
#endif /* BITMAP_LATER */
+ if (VInit >= 2 && V_needsCallback(vp)) {
+ if (V_BreakVolumeCallbacks) {
+ Log("VAttachVolume: Volume %lu was changed externally; breaking callbacks\n",
+ afs_printable_uint32_lu(V_id(vp)));
+ V_needsCallback(vp) = 0;
+ VOL_UNLOCK;
+ (*V_BreakVolumeCallbacks) (V_id(vp));
+ VOL_LOCK;
+
+ VUpdateVolume_r(ec, vp, 0);
+ }
+#ifdef FSSYNC_BUILD_CLIENT
+ else if (VCanUseFSSYNC()) {
+ afs_int32 fsync_code;
+
+ V_needsCallback(vp) = 0;
+ VOL_UNLOCK;
+ fsync_code = FSYNC_VolOp(V_id(vp), NULL, FSYNC_VOL_BREAKCBKS, FSYNC_WHATEVER, NULL);
+ VOL_LOCK;
+
+ if (fsync_code) {
+ V_needsCallback(vp) = 1;
+ Log("Error trying to tell the fileserver to break callbacks for "
+ "changed volume %lu; error code %ld\n",
+ afs_printable_uint32_lu(V_id(vp)),
+ afs_printable_int32_ld(fsync_code));
+ } else {
+ VUpdateVolume_r(ec, vp, 0);
+ }
+ }
+#endif /* FSSYNC_BUILD_CLIENT */
+
+ if (*ec) {
+ Log("VAttachVolume: error %d clearing needsCallback on volume "
+ "%lu; needs salvage\n", (int)*ec,
+ afs_printable_uint32_lu(V_id(vp)));
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
+ VOL_SALVAGE_NO_OFFLINE);
+ vp->nUsers = 0;
+#else /* !AFS_DEMAND_ATTACH_FS */
+ *ec = VSALVAGE;
+#endif /* !AFS_DEMAND_ATTACh_FS */
+ goto locked_error;
+ }
+ }
+
if (programType == fileServer) {
if (vp->specialStatus)
vp->specialStatus = 0;
V_inUse(vp) = fileServer;
V_offlineMessage(vp)[0] = '\0';
}
+ if (!V_inUse(vp)) {
+ *ec = VNOVOL;
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* Put the vol into PREATTACHED state, so if someone tries to
+ * access it again, we try to attach, see that we're not blessed,
+ * and give a VNOVOL error again. Putting it into UNATTACHED state
+ * would result in a VOFFLINE error instead. */
+ error_state = VOL_STATE_PREATTACHED;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ /* mimic e.g. GetVolume errors */
+ if (!V_blessed(vp)) {
+ Log("Volume %lu offline: not blessed\n", afs_printable_uint32_lu(V_id(vp)));
+ FreeVolumeHeader(vp);
+ } else if (!V_inService(vp)) {
+ Log("Volume %lu offline: not in service\n", afs_printable_uint32_lu(V_id(vp)));
+ FreeVolumeHeader(vp);
+ } else {
+ Log("Volume %lu offline: needs salvage\n", afs_printable_uint32_lu(V_id(vp)));
+ *ec = VSALVAGE;
+#ifdef AFS_DEMAND_ATTACH_FS
+ error_state = VOL_STATE_ERROR;
+ /* see if we can recover */
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+#endif
+ }
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp->nUsers = 0;
+#endif
+ goto locked_error;
+ }
} else {
+#ifdef AFS_DEMAND_ATTACH_FS
if ((mode != V_PEEK) && (mode != V_SECRETLY))
V_inUse(vp) = programType;
+#endif /* AFS_DEMAND_ATTACH_FS */
V_checkoutMode(vp) = mode;
}
AddVolumeToHashTable(vp, V_id(vp));
#ifdef AFS_DEMAND_ATTACH_FS
+ if (VCanUnlockAttached() && (V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
if ((programType != fileServer) ||
(V_inUse(vp) == fileServer)) {
AddVolumeToVByPList_r(vp);
VChangeState_r(vp, VOL_STATE_UNATTACHED);
}
#endif
+
return vp;
+
+unlocked_error:
+ VOL_LOCK;
+locked_error:
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (!VIsErrorState(V_attachState(vp))) {
+ VChangeState_r(vp, error_state);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ if (read_header) {
+ VReleaseVolumeHandles_r(vp);
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VCheckSalvage(vp);
+ if (forcefree) {
+ FreeVolume(vp);
+ } else {
+ VCheckFree(vp);
+ }
+#else /* !AFS_DEMAND_ATTACH_FS */
+ FreeVolume(vp);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+ return NULL;
}
/* Attach an existing volume.
char *part, *name;
VGetVolumePath(ec, volumeId, &part, &name);
if (*ec) {
- register Volume *vp;
+ Volume *vp;
Error error;
vp = VGetVolume_r(&error, volumeId);
if (vp) {
- assert(V_inUse(vp) == 0);
+ osi_Assert(V_inUse(vp) == 0);
VDetachVolume_r(ec, vp);
}
return NULL;
* is dropped within VHold */
#ifdef AFS_DEMAND_ATTACH_FS
static int
-VHold_r(register Volume * vp)
+VHold_r(Volume * vp)
{
Error error;
}
#else /* AFS_DEMAND_ATTACH_FS */
static int
-VHold_r(register Volume * vp)
+VHold_r(Volume * vp)
{
Error error;
vp->nUsers++;
return 0;
}
-#endif /* AFS_DEMAND_ATTACH_FS */
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+/**** volume timeout-related stuff ****/
+
+#ifdef AFS_PTHREAD_ENV
+
+static struct timespec *shutdown_timeout;
+static pthread_once_t shutdown_timeout_once = PTHREAD_ONCE_INIT;
+
+static_inline int
+VTimedOut(const struct timespec *ts)
+{
+ struct timeval tv;
+ int code;
+
+ if (ts->tv_sec == 0) {
+ /* short-circuit; this will have always timed out */
+ return 1;
+ }
+
+ code = gettimeofday(&tv, NULL);
+ if (code) {
+ Log("Error %d from gettimeofday, assuming we have not timed out\n", errno);
+ /* assume no timeout; failure mode is we just wait longer than normal
+ * instead of returning errors when we shouldn't */
+ return 0;
+ }
+
+ if (tv.tv_sec < ts->tv_sec ||
+ (tv.tv_sec == ts->tv_sec && tv.tv_usec*1000 < ts->tv_nsec)) {
+
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * Calculate an absolute timeout.
+ *
+ * @param[out] ts A timeout that is "timeout" seconds from now, if we return
+ * NULL, the memory is not touched
+ * @param[in] timeout How long the timeout should be from now
+ *
+ * @return timeout to use
+ * @retval NULL no timeout; wait forever
+ * @retval non-NULL the given value for "ts"
+ *
+ * @internal
+ */
+static struct timespec *
+VCalcTimeout(struct timespec *ts, afs_int32 timeout)
+{
+ struct timeval now;
+ int code;
+
+ if (timeout < 0) {
+ return NULL;
+ }
+
+ if (timeout == 0) {
+ ts->tv_sec = ts->tv_nsec = 0;
+ return ts;
+ }
+
+ code = gettimeofday(&now, NULL);
+ if (code) {
+ Log("Error %d from gettimeofday, falling back to 'forever' timeout\n", errno);
+ return NULL;
+ }
+
+ ts->tv_sec = now.tv_sec + timeout;
+ ts->tv_nsec = now.tv_usec * 1000;
+
+ return ts;
+}
+
+/**
+ * Initialize the shutdown_timeout global.
+ */
+static void
+VShutdownTimeoutInit(void)
+{
+ struct timespec *ts;
+
+ ts = malloc(sizeof(*ts));
+
+ shutdown_timeout = VCalcTimeout(ts, vol_opts.offline_shutdown_timeout);
+
+ if (!shutdown_timeout) {
+ free(ts);
+ }
+}
+
+/**
+ * Figure out the timeout that should be used for waiting for offline volumes.
+ *
+ * @param[out] ats Storage space for a local timeout value if needed
+ *
+ * @return The timeout value that should be used
+ * @retval NULL No timeout; wait forever for offlining volumes
+ * @retval non-NULL A pointer to the absolute time that should be used as
+ * the deadline for waiting for offlining volumes.
+ *
+ * @note If we return non-NULL, the pointer we return may or may not be the
+ * same as "ats"
+ */
+static const struct timespec *
+VOfflineTimeout(struct timespec *ats)
+{
+ if (vol_shutting_down) {
+ osi_Assert(pthread_once(&shutdown_timeout_once, VShutdownTimeoutInit) == 0);
+ return shutdown_timeout;
+ } else {
+ return VCalcTimeout(ats, vol_opts.offline_timeout);
+ }
+}
+
+#else /* AFS_PTHREAD_ENV */
+
+/* Waiting a certain amount of time for offlining volumes is not supported
+ * for LWP due to a lack of primitives. So, we never time out */
+# define VTimedOut(x) (0)
+# define VOfflineTimeout(x) (NULL)
+
+#endif /* !AFS_PTHREAD_ENV */
#if 0
static int
-VHold(register Volume * vp)
+VHold(Volume * vp)
{
int retVal;
VOL_LOCK;
}
#endif
+static afs_int32
+VIsGoingOffline_r(struct Volume *vp)
+{
+ afs_int32 code = 0;
+
+ if (vp->goingOffline) {
+ if (vp->specialStatus) {
+ code = vp->specialStatus;
+ } else if (V_inService(vp) == 0 || V_blessed(vp) == 0) {
+ code = VNOVOL;
+ } else {
+ code = VOFFLINE;
+ }
+ }
+
+ return code;
+}
+
+/**
+ * Tell the caller if a volume is waiting to go offline.
+ *
+ * @param[in] vp The volume we want to know about
+ *
+ * @return volume status
+ * @retval 0 volume is not waiting to go offline, go ahead and use it
+ * @retval nonzero volume is waiting to offline, and give the returned code
+ * as an error to anyone accessing the volume
+ *
+ * @pre VOL_LOCK is NOT held
+ * @pre caller holds a heavyweight reference on vp
+ */
+afs_int32
+VIsGoingOffline(struct Volume *vp)
+{
+ afs_int32 code;
+
+ VOL_LOCK;
+ code = VIsGoingOffline_r(vp);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * Register an RX call with a volume.
+ *
+ * @param[inout] ec Error code; if unset when passed in, may be set if
+ * the volume starts going offline
+ * @param[out] client_ec @see GetVolume
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct containing the RX call to register
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds heavy ref on vp
+ *
+ * @internal
+ */
+static void
+VRegisterCall_r(Error *ec, Error *client_ec, Volume *vp, struct VCallByVol *cbv)
+{
+ if (vp && cbv) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (!*ec) {
+ /* just in case the volume started going offline after we got the
+ * reference to it... otherwise, if the volume started going
+ * offline right at the end of GetVolume(), we might race with the
+ * RX call scanner, and return success and add our cbv to the
+ * rx_call_list _after_ the scanner has scanned the list. */
+ *ec = VIsGoingOffline_r(vp);
+ if (client_ec) {
+ *client_ec = *ec;
+ }
+ }
+
+ while (V_attachState(vp) == VOL_STATE_SCANNING_RXCALLS) {
+ VWaitStateChange_r(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ queue_Prepend(&vp->rx_call_list, cbv);
+ }
+}
+
+/**
+ * Deregister an RX call with a volume.
+ *
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct containing the RX call to deregister
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds heavy ref on vp
+ *
+ * @internal
+ */
+static void
+VDeregisterCall_r(Volume *vp, struct VCallByVol *cbv)
+{
+ if (cbv && queue_IsOnQueue(cbv)) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ while (V_attachState(vp) == VOL_STATE_SCANNING_RXCALLS) {
+ VWaitStateChange_r(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ queue_Remove(cbv);
+ }
+}
/***************************************************/
/* get and put volume routines */
* @internal volume package internal use only
*/
void
-VPutVolume_r(register Volume * vp)
+VPutVolume_r(Volume * vp)
{
- assert(--vp->nUsers >= 0);
+ osi_Assert(--vp->nUsers >= 0);
if (vp->nUsers == 0) {
VCheckOffline(vp);
ReleaseVolumeHeader(vp->header);
}
void
-VPutVolume(register Volume * vp)
+VPutVolume(Volume * vp)
{
VOL_LOCK;
VPutVolume_r(vp);
VOL_UNLOCK;
}
+/**
+ * Puts a volume reference obtained with VGetVolumeWithCall.
+ *
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct given to VGetVolumeWithCall, or NULL if none
+ *
+ * @pre VOL_LOCK is NOT held
+ */
+void
+VPutVolumeWithCall(Volume *vp, struct VCallByVol *cbv)
+{
+ VOL_LOCK;
+ VDeregisterCall_r(vp, cbv);
+ VPutVolume_r(vp);
+ VOL_UNLOCK;
+}
/* Get a pointer to an attached volume. The pointer is returned regardless
of whether or not the volume is in service or on/off line. An error
return retVal;
}
+/**
+ * Get a volume reference associated with an RX call.
+ *
+ * @param[out] ec @see GetVolume
+ * @param[out] client_ec @see GetVolume
+ * @param[in] volumeId @see GetVolume
+ * @param[in] ts How long to wait for going-offline volumes (absolute time).
+ * If NULL, wait forever. If ts->tv_sec == 0, return immediately
+ * with an error if the volume is going offline.
+ * @param[in] cbv Contains an RX call to be associated with this volume
+ * reference. This call may be interrupted if the volume is
+ * requested to go offline while we hold a ref on it. Give NULL
+ * to not associate an RX call with this reference.
+ *
+ * @return @see GetVolume
+ *
+ * @note for LWP builds, ts must be NULL
+ *
+ * @note A reference obtained with this function MUST be put back with
+ * VPutVolumeWithCall
+ */
+Volume *
+VGetVolumeWithCall(Error * ec, Error * client_ec, VolId volumeId,
+ const struct timespec *ts, struct VCallByVol *cbv)
+{
+ Volume *retVal;
+ VOL_LOCK;
+ retVal = GetVolume(ec, client_ec, volumeId, NULL, ts);
+ VRegisterCall_r(ec, client_ec, retVal, cbv);
+ VOL_UNLOCK;
+ return retVal;
+}
+
Volume *
VGetVolume_r(Error * ec, VolId volumeId)
{
- return GetVolume(ec, NULL, volumeId, NULL, 0);
+ return GetVolume(ec, NULL, volumeId, NULL, NULL);
}
/* try to get a volume we've previously looked up */
/* for demand attach fs, caller MUST NOT hold a ref count on vp */
-Volume *
+Volume *
VGetVolumeByVp_r(Error * ec, Volume * vp)
{
- return GetVolume(ec, NULL, vp->hashid, vp, 0);
+ return GetVolume(ec, NULL, vp->hashid, vp, NULL);
}
-/* private interface for getting a volume handle
- * volumeId must be provided.
- * hint is an optional parameter to speed up hash lookups
- * flags is not used at this time
+/**
+ * private interface for getting a volume handle
+ *
+ * @param[out] ec error code (0 if no error)
+ * @param[out] client_ec wire error code to be given to clients
+ * @param[in] volumeId ID of the volume we want
+ * @param[in] hint optional hint for hash lookups, or NULL
+ * @param[in] timeout absolute deadline for waiting for the volume to go
+ * offline, if it is going offline. NULL to wait forever.
+ *
+ * @return a volume handle for the specified volume
+ * @retval NULL an error occurred, or the volume is in such a state that
+ * we cannot load a header or return any volume struct
+ *
+ * @note for DAFS, caller must NOT hold a ref count on 'hint'
+ *
+ * @note 'timeout' is only checked if the volume is actually going offline; so
+ * if you pass timeout->tv_sec = 0, this will exhibit typical
+ * nonblocking behavior.
+ *
+ * @note for LWP builds, 'timeout' must be NULL
*/
-/* for demand attach fs, caller MUST NOT hold a ref count on hint */
static Volume *
-GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flags)
+GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint,
+ const struct timespec *timeout)
{
Volume *vp = hint;
/* pull this profiling/debugging code out of regular builds */
Volume *avp, * rvp = hint;
#endif
- /*
+ /*
* if VInit is zero, the volume package dynamic
* data structures have not been initialized yet,
* and we must immediately return an error
VGET_CTR_INC(V3);
IncUInt64(&VStats.hdr_gets);
-
+
#ifdef AFS_DEMAND_ATTACH_FS
/* block if someone else is performing an exclusive op on this volume */
if (rvp != vp) {
}
/*
- * short circuit with VOFFLINE in the following circumstances:
- *
- * - VOL_STATE_UNATTACHED
+ * short circuit with VOFFLINE for VOL_STATE_UNATTACHED and
+ * VNOVOL for VOL_STATE_DELETED
*/
- if (V_attachState(vp) == VOL_STATE_UNATTACHED) {
+ if ((V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ (V_attachState(vp) == VOL_STATE_DELETED)) {
if (vp->specialStatus) {
*ec = vp->specialStatus;
+ } else if (V_attachState(vp) == VOL_STATE_DELETED) {
+ *ec = VNOVOL;
} else {
*ec = VOFFLINE;
}
* - PREATTACHED
* - ATTACHED
* - SALVAGING
+ * - SALVAGE_REQ
*/
if (vp->salvage.requested) {
}
if (V_attachState(vp) == VOL_STATE_PREATTACHED) {
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
+ vp = NULL;
+ break;
+ }
avp = VAttachVolumeByVp_r(ec, vp, 0);
if (avp) {
if (vp != avp) {
}
}
- if ((V_attachState(vp) == VOL_STATE_SALVAGING) ||
- (*ec == VSALVAGING)) {
+ if (VIsSalvaging(vp) || (*ec == VSALVAGING)) {
if (client_ec) {
/* see CheckVnode() in afsfileprocs.c for an explanation
* of this error code logic */
}
#endif
- LoadVolumeHeader(ec, vp);
- if (*ec) {
- VGET_CTR_INC(V6);
- /* Only log the error if it was a totally unexpected error. Simply
- * a missing inode is likely to be caused by the volume being deleted */
- if (errno != ENXIO || LogLevel)
- Log("Volume %u: couldn't reread volume header\n",
- vp->hashid);
-#ifdef AFS_DEMAND_ATTACH_FS
- if (VCanScheduleSalvage()) {
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- } else {
- FreeVolume(vp);
- vp = NULL;
- }
-#else /* AFS_DEMAND_ATTACH_FS */
- FreeVolume(vp);
- vp = NULL;
-#endif /* AFS_DEMAND_ATTACH_FS */
- break;
- }
-
#ifdef AFS_DEMAND_ATTACH_FS
/*
- * this test MUST happen after the volume header is loaded
+ * this test MUST happen after VAttachVolymeByVp, so vol_op_state is
+ * not VolOpRunningUnknown (attach2 would have converted it to Online
+ * or Offline)
*/
-
+
/* only valid before/during demand attachment */
- assert(!vp->pending_vol_op || vp->pending_vol_op->vol_op_state != FSSYNC_VolOpRunningUnknown);
-
+ osi_Assert(!vp->pending_vol_op || vp->pending_vol_op->vol_op_state != FSSYNC_VolOpRunningUnknown);
+
/* deny getvolume due to running mutually exclusive vol op */
if (vp->pending_vol_op && vp->pending_vol_op->vol_op_state==FSSYNC_VolOpRunningOffline) {
- /*
+ /*
* volume cannot remain online during this volume operation.
- * notify client.
+ * notify client.
*/
if (vp->specialStatus) {
/*
break;
}
#endif /* AFS_DEMAND_ATTACH_FS */
-
+
+ LoadVolumeHeader(ec, vp);
+ if (*ec) {
+ VGET_CTR_INC(V6);
+ /* Only log the error if it was a totally unexpected error. Simply
+ * a missing inode is likely to be caused by the volume being deleted */
+ if (errno != ENXIO || LogLevel)
+ Log("Volume %u: couldn't reread volume header\n",
+ vp->hashid);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (VCanScheduleSalvage()) {
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ } else {
+ FreeVolume(vp);
+ vp = NULL;
+ }
+#else /* AFS_DEMAND_ATTACH_FS */
+ FreeVolume(vp);
+ vp = NULL;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ break;
+ }
+
VGET_CTR_INC(V7);
if (vp->shuttingDown) {
VGET_CTR_INC(V8);
if (programType == fileServer) {
VGET_CTR_INC(V9);
if (vp->goingOffline) {
- VGET_CTR_INC(V10);
+ if (timeout && VTimedOut(timeout)) {
+ /* we've timed out; don't wait for the vol */
+ } else {
+ VGET_CTR_INC(V10);
#ifdef AFS_DEMAND_ATTACH_FS
- /* wait for the volume to go offline */
- if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
- VWaitStateChange_r(vp);
- }
+ /* wait for the volume to go offline */
+ if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
+ VTimedWaitStateChange_r(vp, timeout, NULL);
+ }
#elif defined(AFS_PTHREAD_ENV)
- VOL_CV_WAIT(&vol_put_volume_cond);
+ VOL_CV_TIMEDWAIT(&vol_put_volume_cond, timeout, NULL);
#else /* AFS_PTHREAD_ENV */
- LWP_WaitProcess(VPutVolume);
+ /* LWP has no timed wait, so the caller better not be
+ * expecting one */
+ osi_Assert(!timeout);
+ LWP_WaitProcess(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
- continue;
+ continue;
+ }
}
if (vp->specialStatus) {
VGET_CTR_INC(V11);
} else if (V_inService(vp) == 0 || V_blessed(vp) == 0) {
VGET_CTR_INC(V12);
*ec = VNOVOL;
- } else if (V_inUse(vp) == 0) {
+ } else if (V_inUse(vp) == 0 || vp->goingOffline) {
VGET_CTR_INC(V13);
*ec = VOFFLINE;
} else {
#endif /* AFS_DEMAND_ATTACH_FS */
not_inited:
- assert(vp || *ec);
+ osi_Assert(vp || *ec);
return vp;
}
/* caller MUST hold a heavyweight ref on vp */
#ifdef AFS_DEMAND_ATTACH_FS
void
-VTakeOffline_r(register Volume * vp)
+VTakeOffline_r(Volume * vp)
{
Error error;
- assert(vp->nUsers > 0);
- assert(programType == fileServer);
+ osi_Assert(vp->nUsers > 0);
+ osi_Assert(programType == fileServer);
VCreateReservation_r(vp);
VWaitExclusiveState_r(vp);
}
#else /* AFS_DEMAND_ATTACH_FS */
void
-VTakeOffline_r(register Volume * vp)
+VTakeOffline_r(Volume * vp)
{
- assert(vp->nUsers > 0);
- assert(programType == fileServer);
+ osi_Assert(vp->nUsers > 0);
+ osi_Assert(programType == fileServer);
vp->goingOffline = 1;
V_needsSalvaged(vp) = 1;
#endif /* AFS_DEMAND_ATTACH_FS */
void
-VTakeOffline(register Volume * vp)
+VTakeOffline(Volume * vp)
{
VOL_LOCK;
VTakeOffline_r(vp);
*
* @post needsSalvaged flag is set.
* for DAFS, salvage is requested.
- * no further references to the volume through the volume
+ * no further references to the volume through the volume
* package will be honored.
* all file descriptor and vnode caches are invalidated.
*
* @warning this is a heavy-handed interface. it results in
- * a volume going offline regardless of the current
+ * a volume going offline regardless of the current
* reference count state.
*
* @internal volume package internal use only
#endif /* AFS_DEMAND_ATTACH_FS */
#ifdef AFS_PTHREAD_ENV
- assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
+ CV_BROADCAST(&vol_put_volume_cond);
#else /* AFS_PTHREAD_ENV */
LWP_NoYieldSignal(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
VOL_UNLOCK;
}
+/**
+ * Iterate over the RX calls associated with a volume, and interrupt them.
+ *
+ * @param[in] vp The volume whose RX calls we want to scan
+ *
+ * @pre VOL_LOCK held
+ */
+static void
+VScanCalls_r(struct Volume *vp)
+{
+ struct VCallByVol *cbv, *ncbv;
+ afs_int32 err;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+#endif
+
+ if (queue_IsEmpty(&vp->rx_call_list))
+ return; /* no calls to interrupt */
+ if (!vol_opts.interrupt_rxcall)
+ return; /* we have no function with which to interrupt calls */
+ err = VIsGoingOffline_r(vp);
+ if (!err)
+ return; /* we're not going offline anymore */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VWaitExclusiveState_r(vp);
+ state_save = VChangeState_r(vp, VOL_STATE_SCANNING_RXCALLS);
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ for(queue_Scan(&vp->rx_call_list, cbv, ncbv, VCallByVol)) {
+ if (LogLevel > 0) {
+ struct rx_peer *peer;
+ char hoststr[16];
+ peer = rx_PeerOf(rx_ConnectionOf(cbv->call));
+
+ Log("Offlining volume %lu while client %s:%u is trying to read "
+ "from it; kicking client off with error %ld\n",
+ (long unsigned) vp->hashid,
+ afs_inet_ntoa_r(rx_HostOf(peer), hoststr),
+ (unsigned) ntohs(rx_PortOf(peer)),
+ (long) err);
+ }
+ (*vol_opts.interrupt_rxcall) (cbv->call, err);
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VChangeState_r(vp, state_save);
+#endif /* AFS_DEMAND_ATTACH_FS */
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * Wait for a vp to go offline.
+ *
+ * @param[out] ec 1 if a salvage on the volume has been requested and
+ * salvok == 0, 0 otherwise
+ * @param[in] vp The volume to wait for
+ * @param[in] salvok If 0, we return immediately with *ec = 1 if the volume
+ * has been requested to salvage. Otherwise we keep waiting
+ * until the volume has gone offline.
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds a lightweight ref on vp
+ *
+ * @note DAFS only
+ */
+static void
+VWaitForOfflineByVp_r(Error *ec, struct Volume *vp, int salvok)
+{
+ struct timespec timeout_ts;
+ const struct timespec *ts;
+ int timedout = 0;
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ *ec = 0;
+
+ while (!VIsOfflineState(V_attachState(vp)) && !timedout) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+ VTimedWaitStateChange_r(vp, ts, &timedout);
+ }
+ if (!timedout) {
+ /* we didn't time out, so the volume must be offline, so we're done */
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+
+ while (!VIsOfflineState(V_attachState(vp))) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+
+ VWaitStateChange_r(vp);
+ }
+}
+
+#else /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * Wait for a volume to go offline.
+ *
+ * @pre VOL_LOCK held
+ *
+ * @note non-DAFS only (for DAFS, use @see WaitForOfflineByVp_r)
+ */
+static void
+VWaitForOffline_r(Error *ec, VolumeId volid)
+{
+ struct Volume *vp;
+ const struct timespec *ts;
+#ifdef AFS_PTHREAD_ENV
+ struct timespec timeout_ts;
+#endif
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ vp = GetVolume(ec, NULL, volid, NULL, ts);
+ if (!vp) {
+ /* error occurred so bad that we can't even get a vp; we have no
+ * information on the vol so we don't know whether to wait, so just
+ * return */
+ return;
+ }
+ if (!VIsGoingOffline_r(vp)) {
+ /* volume is no longer going offline, so we're done */
+ VPutVolume_r(vp);
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+ VPutVolume_r(vp);
+ vp = NULL;
+
+ vp = VGetVolume_r(ec, volid);
+ if (vp) {
+ /* In case it was reattached... */
+ VPutVolume_r(vp);
+ }
+}
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
/* The opposite of VAttachVolume. The volume header is written to disk, with
the inUse bit turned off. A copy of the header is maintained in memory,
however (which is why this is VOffline, not VDetach).
void
VOffline_r(Volume * vp, char *message)
{
-#ifndef AFS_DEMAND_ATTACH_FS
Error error;
+#ifndef AFS_DEMAND_ATTACH_FS
VolumeId vid = V_id(vp);
#endif
- assert(programType != volumeUtility && programType != volumeServer);
+ osi_Assert(programType != volumeUtility && programType != volumeServer);
if (!V_inUse(vp)) {
VPutVolume_r(vp);
return;
VChangeState_r(vp, VOL_STATE_GOING_OFFLINE);
VCreateReservation_r(vp);
VPutVolume_r(vp);
-
- /* wait for the volume to go offline */
- if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
- VWaitStateChange_r(vp);
- }
+ VWaitForOfflineByVp_r(&error, vp, 1);
VCancelReservation_r(vp);
#else /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
- vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */
- if (vp) /* In case it was reattached... */
- VPutVolume_r(vp);
+ VWaitForOffline_r(&error, vid);
#endif /* AFS_DEMAND_ATTACH_FS */
}
void
VOfflineForVolOp_r(Error *ec, Volume *vp, char *message)
{
- assert(vp->pending_vol_op);
+ int salvok = 1;
+ osi_Assert(vp->pending_vol_op);
if (!V_inUse(vp)) {
VPutVolume_r(vp);
*ec = 1;
VCreateReservation_r(vp);
VPutVolume_r(vp);
- /* Wait for the volume to go offline */
- while (!VIsOfflineState(V_attachState(vp))) {
+ if (vp->pending_vol_op->com.programType != salvageServer) {
/* do not give corrupted volumes to the volserver */
- if (vp->salvage.requested && vp->pending_vol_op->com.programType != salvageServer) {
- *ec = 1;
- goto error;
- }
- VWaitStateChange_r(vp);
+ salvok = 0;
}
- *ec = 0;
- error:
+
+ *ec = 0;
+ VWaitForOfflineByVp_r(ec, vp, salvok);
+
VCancelReservation_r(vp);
}
#endif /* AFS_DEMAND_ATTACH_FS */
void
VDetachVolume_r(Error * ec, Volume * vp)
{
+#ifdef FSSYNC_BUILD_CLIENT
VolumeId volume;
struct DiskPartition64 *tpartp;
int notifyServer = 0;
int useDone = FSYNC_VOL_ON;
- *ec = 0; /* always "succeeds" */
if (VCanUseFSSYNC()) {
notifyServer = vp->needsPutBack;
if (V_destroyMe(vp) == DESTROY_ME)
- useDone = FSYNC_VOL_DONE;
-#ifdef AFS_DEMAND_ATTACH_FS
+ useDone = FSYNC_VOL_LEAVE_OFF;
+# ifdef AFS_DEMAND_ATTACH_FS
else if (!V_blessed(vp) || !V_inService(vp))
useDone = FSYNC_VOL_LEAVE_OFF;
-#endif
+# endif
+ }
+# ifdef AFS_DEMAND_ATTACH_FS
+ if (V_needsSalvaged(vp)) {
+ notifyServer = 0;
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, 0);
}
+# endif
tpartp = vp->partition;
volume = V_id(vp);
+#endif /* FSSYNC_BUILD_CLIENT */
+
+ *ec = 0; /* always "succeeds" */
DeleteVolumeFromHashTable(vp);
vp->shuttingDown = 1;
#ifdef AFS_DEMAND_ATTACH_FS
VLRU_Delete_r(vp);
VChangeState_r(vp, VOL_STATE_SHUTTING_DOWN);
#else
- if (programType != fileServer)
+ if (programType != fileServer)
V_inUse(vp) = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
*/
#ifdef FSSYNC_BUILD_CLIENT
if (VCanUseFSSYNC() && notifyServer) {
- /*
- * Note: The server is not notified in the case of a bogus volume
- * explicitly to make it possible to create a volume, do a partial
- * restore, then abort the operation without ever putting the volume
- * online. This is essential in the case of a volume move operation
- * between two partitions on the same server. In that case, there
- * would be two instances of the same volume, one of them bogus,
- * which the file server would attempt to put on line
+ if (notifyServer == VOL_PUTBACK_DELETE) {
+ /* Only send FSYNC_VOL_DONE if the volume was actually deleted.
+ * volserver code will set needsPutBack to VOL_PUTBACK_DELETE
+ * to signify a deleted volume. */
+ useDone = FSYNC_VOL_DONE;
+ }
+ /*
+ * Note: The server is not notified in the case of a bogus volume
+ * explicitly to make it possible to create a volume, do a partial
+ * restore, then abort the operation without ever putting the volume
+ * online. This is essential in the case of a volume move operation
+ * between two partitions on the same server. In that case, there
+ * would be two instances of the same volume, one of them bogus,
+ * which the file server would attempt to put on line
*/
FSYNC_VolOp(volume, tpartp->name, useDone, 0, NULL);
/* XXX this code path is only hit by volume utilities, thus
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_OFFLINING);
+
+ VOL_UNLOCK;
#endif
- /* demand attach fs
- *
- * XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex...
- *
- * VCloseVnodeFiles_r drops the glock internally */
- DFlushVolume(V_id(vp));
+ DFlushVolume(vp->hashid);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ /* DAFS: VCloseVnodeFiles_r drops the glock internally */
VCloseVnodeFiles_r(vp);
#ifdef AFS_DEMAND_ATTACH_FS
IH_REALLYCLOSE(vp->linkHandle);
#ifdef AFS_DEMAND_ATTACH_FS
+ if ((V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
+
VOL_LOCK;
VChangeState_r(vp, state_save);
#endif
/* For both VForceOffline and VOffline, we close all relevant handles.
* For VOffline, if we re-attach the volume, the files may possible be
- * different than before.
+ * different than before.
*/
/* for demand attach, caller MUST hold a ref count on vp */
static void
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_DETACHING);
+
+ VOL_UNLOCK;
#endif
- /* XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex... */
- DFlushVolume(V_id(vp));
+ DFlushVolume(vp->hashid);
- VReleaseVnodeFiles_r(vp); /* releases the glock internally */
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ VReleaseVnodeFiles_r(vp); /* DAFS: releases the glock internally */
#ifdef AFS_DEMAND_ATTACH_FS
VOL_UNLOCK;
IH_RELEASE(vp->linkHandle);
#ifdef AFS_DEMAND_ATTACH_FS
+ if ((V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
+
VOL_LOCK;
VChangeState_r(vp, state_save);
#endif
if (*ec) {
Log("VUpdateVolume: error updating volume header, volume %u (%s)\n",
V_id(vp), V_name(vp));
- /* try to update on-disk header,
+ /* try to update on-disk header,
* while preventing infinite recursion */
if (!(flags & VOL_UPDATE_NOFORCEOFF)) {
VForceOffline_r(vp, VOL_FORCEOFF_NOUPDATE);
VOL_UNLOCK;
#endif
fdP = IH_OPEN(V_diskDataHandle(vp));
- assert(fdP != NULL);
+ osi_Assert(fdP != NULL);
code = FDH_SYNC(fdP);
- assert(code == 0);
+ osi_Assert(code == 0);
FDH_CLOSE(fdP);
#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
* returns 1 if volume was freed, 0 otherwise */
#ifdef AFS_DEMAND_ATTACH_FS
static int
-VCheckDetach(register Volume * vp)
+VCheckDetach(Volume * vp)
{
int ret = 0;
Error ec = 0;
VCheckSalvage(vp);
ReallyFreeVolume(vp);
if (programType == fileServer) {
- assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
+ CV_BROADCAST(&vol_put_volume_cond);
}
}
return ret;
}
#else /* AFS_DEMAND_ATTACH_FS */
static int
-VCheckDetach(register Volume * vp)
+VCheckDetach(Volume * vp)
{
int ret = 0;
Error ec = 0;
ReallyFreeVolume(vp);
if (programType == fileServer) {
#if defined(AFS_PTHREAD_ENV)
- assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
+ CV_BROADCAST(&vol_put_volume_cond);
#else /* AFS_PTHREAD_ENV */
LWP_NoYieldSignal(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
* return 1 if volume went offline, 0 otherwise */
#ifdef AFS_DEMAND_ATTACH_FS
static int
-VCheckOffline(register Volume * vp)
+VCheckOffline(Volume * vp)
{
int ret = 0;
if (vp->goingOffline && !vp->nUsers) {
Error error;
- assert(programType == fileServer);
- assert((V_attachState(vp) != VOL_STATE_ATTACHED) &&
+ osi_Assert(programType == fileServer);
+ osi_Assert((V_attachState(vp) != VOL_STATE_ATTACHED) &&
(V_attachState(vp) != VOL_STATE_FREED) &&
(V_attachState(vp) != VOL_STATE_PREATTACHED) &&
- (V_attachState(vp) != VOL_STATE_UNATTACHED));
+ (V_attachState(vp) != VOL_STATE_UNATTACHED) &&
+ (V_attachState(vp) != VOL_STATE_DELETED));
/* valid states:
*
}
#else /* AFS_DEMAND_ATTACH_FS */
static int
-VCheckOffline(register Volume * vp)
+VCheckOffline(Volume * vp)
{
int ret = 0;
if (vp->goingOffline && !vp->nUsers) {
Error error;
- assert(programType == fileServer);
+ osi_Assert(programType == fileServer);
ret = 1;
vp->goingOffline = 0;
VUpdateVolume_r(&error, vp, 0);
VCloseVolumeHandles_r(vp);
if (LogLevel) {
- Log("VOffline: Volume %u (%s) is now offline", V_id(vp),
- V_name(vp));
- if (V_offlineMessage(vp)[0])
- Log(" (%s)", V_offlineMessage(vp));
- Log("\n");
+ if (V_offlineMessage(vp)[0]) {
+ Log("VOffline: Volume %lu (%s) is now offline (%s)\n",
+ afs_printable_uint32_lu(V_id(vp)), V_name(vp),
+ V_offlineMessage(vp));
+ } else {
+ Log("VOffline: Volume %lu (%s) is now offline\n",
+ afs_printable_uint32_lu(V_id(vp)), V_name(vp));
+ }
}
FreeVolumeHeader(vp);
#ifdef AFS_PTHREAD_ENV
- assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
+ CV_BROADCAST(&vol_put_volume_cond);
#else /* AFS_PTHREAD_ENV */
LWP_NoYieldSignal(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
* from free()ing the Volume struct during an async i/o op */
/* register with the async volume op ref counter */
-/* VCreateReservation_r moved into inline code header because it
- * is now needed in vnode.c -- tkeiser 11/20/2007
+/* VCreateReservation_r moved into inline code header because it
+ * is now needed in vnode.c -- tkeiser 11/20/2007
*/
/**
*
* @internal volume package internal use only
*
- * @pre
+ * @pre
* @arg VOL_LOCK is held
* @arg lightweight refcount held
*
void
VCancelReservation_r(Volume * vp)
{
- assert(--vp->nWaiters >= 0);
+ osi_Assert(--vp->nWaiters >= 0);
if (vp->nWaiters == 0) {
VCheckOffline(vp);
if (!VCheckDetach(vp)) {
int ret = 0;
if ((vp->nUsers == 0) &&
(vp->nWaiters == 0) &&
- !(V_attachFlags(vp) & (VOL_IN_HASH |
- VOL_ON_VBYP_LIST |
+ !(V_attachFlags(vp) & (VOL_IN_HASH |
+ VOL_ON_VBYP_LIST |
VOL_IS_BUSY |
VOL_ON_VLRU))) {
ReallyFreeVolume(vp);
/* attach a vol op info node to the volume struct */
info = (FSSYNC_VolOp_info *) malloc(sizeof(FSSYNC_VolOp_info));
- assert(info != NULL);
+ osi_Assert(info != NULL);
memcpy(info, vopinfo, sizeof(FSSYNC_VolOp_info));
vp->pending_vol_op = info;
}
/**
+ * same as VVolOpLeaveOnline_r, but does not require a volume with an attached
+ * header.
+ *
+ * @param[in] vp volume object
+ * @param[in] vopinfo volume operation info object
+ *
+ * @return whether it is safe to leave volume online
+ * @retval 0 it is NOT SAFE to leave the volume online
+ * @retval 1 it is safe to leave the volume online during the operation
+ * @retval -1 unsure; volume header is required in order to know whether or
+ * not is is safe to leave the volume online
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @internal volume package internal use only
+ */
+int
+VVolOpLeaveOnlineNoHeader_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
+{
+ /* follow the logic in VVolOpLeaveOnline_r; this is the same, except
+ * assume that we don't know VolumeWriteable; return -1 if the answer
+ * depends on VolumeWriteable */
+
+ if (vopinfo->vol_op_state == FSSYNC_VolOpRunningOnline) {
+ return 1;
+ }
+ if (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ vopinfo->com.reason == V_READONLY) {
+
+ return 1;
+ }
+ if (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ (vopinfo->com.reason == V_CLONE ||
+ vopinfo->com.reason == V_DUMP)) {
+
+ /* must know VolumeWriteable */
+ return -1;
+ }
+ return 0;
+}
+
+/**
* determine whether VBUSY should be set during this volume operation.
*
* @param[in] vp volume object
/* online salvager routines */
/***************************************************/
#if defined(AFS_DEMAND_ATTACH_FS)
+
+/**
+ * offline a volume to let it be salvaged.
+ *
+ * @param[in] vp Volume to offline
+ *
+ * @return whether we offlined the volume successfully
+ * @retval 0 volume was not offlined
+ * @retval 1 volume is now offline
+ *
+ * @note This is similar to VCheckOffline, but slightly different. We do not
+ * deal with vp->goingOffline, and we try to avoid touching the volume
+ * header except just to set needsSalvaged
+ *
+ * @pre VOL_LOCK held
+ * @pre vp->nUsers == 0
+ * @pre V_attachState(vp) == VOL_STATE_SALVAGE_REQ
+ */
+static int
+VOfflineForSalvage_r(struct Volume *vp)
+{
+ Error error;
+
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+
+ if (vp->nUsers || V_attachState(vp) == VOL_STATE_SALVAGING) {
+ /* Someone's using the volume, or someone got to scheduling the salvage
+ * before us. I don't think either of these should be possible, as we
+ * should gain no new heavyweight references while we're trying to
+ * salvage, but just to be sure... */
+ VCancelReservation_r(vp);
+ return 0;
+ }
+
+ VChangeState_r(vp, VOL_STATE_OFFLINING);
+
+ VLRU_Delete_r(vp);
+ if (vp->header) {
+ V_needsSalvaged(vp) = 1;
+ /* ignore error; updating needsSalvaged is just best effort */
+ VUpdateVolume_r(&error, vp, VOL_UPDATE_NOFORCEOFF);
+ }
+ VCloseVolumeHandles_r(vp);
+
+ FreeVolumeHeader(vp);
+
+ /* volume has been effectively offlined; we can mark it in the SALVAGING
+ * state now, which lets FSSYNC give it away */
+ VChangeState_r(vp, VOL_STATE_SALVAGING);
+
+ VCancelReservation_r(vp);
+
+ return 1;
+}
+
/**
* check whether a salvage needs to be performed on this volume.
*
*
* @note this is one of the event handlers called by VCancelReservation_r
*
+ * @note the caller must check if the volume needs to be freed after calling
+ * this; the volume may not have any references or be on any lists after
+ * we return, and we do not free it
+ *
* @see VCancelReservation_r
*
* @internal volume package internal use only.
*/
static int
-VCheckSalvage(register Volume * vp)
+VCheckSalvage(Volume * vp)
{
int ret = 0;
-#ifdef SALVSYNC_BUILD_CLIENT
- if (vp->nUsers || vp->nWaiters)
+#if defined(SALVSYNC_BUILD_CLIENT) || defined(FSSYNC_BUILD_CLIENT)
+ if (vp->nUsers)
return ret;
+ if (!vp->salvage.requested) {
+ return ret;
+ }
+
+ /* prevent recursion; some of the code below creates and removes
+ * lightweight refs, which can call VCheckSalvage */
+ if (vp->salvage.scheduling) {
+ return ret;
+ }
+ vp->salvage.scheduling = 1;
+
+ if (V_attachState(vp) == VOL_STATE_SALVAGE_REQ) {
+ if (!VOfflineForSalvage_r(vp)) {
+ vp->salvage.scheduling = 0;
+ return ret;
+ }
+ }
+
if (vp->salvage.requested) {
VScheduleSalvage_r(vp);
ret = 1;
}
-#endif /* SALVSYNC_BUILD_CLIENT */
+ vp->salvage.scheduling = 0;
+#endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */
return ret;
}
* @param[in] flags see flags note below
*
* @note flags:
- * VOL_SALVAGE_INVALIDATE_HEADER causes volume header cache entry
+ * VOL_SALVAGE_INVALIDATE_HEADER causes volume header cache entry
* to be invalidated.
*
* @pre VOL_LOCK is held.
* @retval 0 volume salvage will occur
* @retval 1 volume salvage could not be scheduled
*
- * @note DAFS fileserver only
+ * @note DAFS only
*
- * @note this call does not synchronously schedule a volume salvage. rather,
- * it sets volume state so that when volume refcounts reach zero, a
- * volume salvage will occur. by "refcounts", we mean both nUsers and
- * nWaiters must be zero.
+ * @note in the fileserver, this call does not synchronously schedule a volume
+ * salvage. rather, it sets volume state so that when volume refcounts
+ * reach zero, a volume salvage will occur. by "refcounts", we mean both
+ * nUsers and nWaiters must be zero.
*
* @internal volume package internal use only.
*/
{
int code = 0;
/*
- * for DAFS volume utilities, transition to error state
- * (at some point in the future, we should consider
- * making volser talk to salsrv)
+ * for DAFS volume utilities that are not supposed to schedule salvages,
+ * just transition to error state instead
*/
if (!VCanScheduleSalvage()) {
VChangeState_r(vp, VOL_STATE_ERROR);
return 1;
}
- if (programType != fileServer) {
-#ifdef FSSYNC_BUILD_CLIENT
- if (VCanUseFSSYNC()) {
- /*
- * If we aren't the fileserver, tell the fileserver the volume
- * needs to be salvaged. We could directly tell the
- * salvageserver, but the fileserver keeps track of some stats
- * related to salvages, and handles some other salvage-related
- * complications for us.
- */
-
- /*
- * You might wonder why we don't check for
- * VIsSalvager(V_inUse(vp)) here, since we do check for that
- * in the fileServer case (below). The reason is that the
- * below check is done since the fileServer can't tell if a
- * salvage is still running or not when V_inUse refers to a
- * salvaging program. However, if we are a non-fileserver,
- * to get here we must have checked out the volume from the
- * fileserver and locked the partition, meaning there must
- * be no salvager running; so we just always try to salvage
- */
-
- code = FSYNC_VolOp(vp->hashid, vp->partition->name,
- FSYNC_VOL_FORCE_ERROR, FSYNC_SALVAGE, NULL);
- if (code == SYNC_OK) {
- *ec = VSALVAGING;
- return 0;
- }
- Log("VRequestSalvage: force error salvage state of volume %u"
- " denied by fileserver\n", vp->hashid);
-
- /* fall through to error condition below */
- }
-#endif /* FSSYNC_BUILD_CLIENT */
+ if (programType != fileServer && !VCanUseFSSYNC()) {
VChangeState_r(vp, VOL_STATE_ERROR);
*ec = VSALVAGE;
return 1;
vp->salvage.reason = reason;
vp->stats.last_salvage = FT_ApproxTime();
- if (vp->header && VIsSalvager(V_inUse(vp))) {
- /* Right now we can't tell for sure if this indicates a
- * salvage is running, or if a running salvage crashed, so
- * we always ERROR the volume in case a salvage is running.
- * Once we get rid of the partition lock and instead lock
- * individual volume header files for salvages, we will
- * probably be able to tell if a salvage is running, and we
- * can do away with this behavior. */
- Log("VRequestSalvage: volume %u appears to be salvaging, but we\n", vp->hashid);
- Log(" didn't request a salvage. Forcing it offline waiting for the\n");
- Log(" salvage to finish; if you are sure no salvage is running,\n");
- Log(" run a salvage manually.\n");
-
- /* make sure neither VScheduleSalvage_r nor
- * VUpdateSalvagePriority_r try to schedule another salvage */
- vp->salvage.requested = vp->salvage.scheduled = 0;
-
- /* these stats aren't correct, but doing this makes them
- * slightly closer to being correct */
- vp->stats.salvages++;
- vp->stats.last_salvage_req = FT_ApproxTime();
- IncUInt64(&VStats.salvages);
-
- VChangeState_r(vp, VOL_STATE_ERROR);
- *ec = VSALVAGE;
- code = 1;
+ /* Note that it is not possible for us to reach this point if a
+ * salvage is already running on this volume (even if the fileserver
+ * was restarted during the salvage). If a salvage were running, the
+ * salvager would have write-locked the volume header file, so when
+ * we tried to lock the volume header, the lock would have failed,
+ * and we would have failed during attachment prior to calling
+ * VRequestSalvage. So we know that we can schedule salvages without
+ * fear of a salvage already running for this volume. */
+
+ if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
+
+ /* if we don't need to offline the volume, we can go directly
+ * to SALVAGING. SALVAGING says the volume is offline and is
+ * either salvaging or ready to be handed to the salvager.
+ * SALVAGE_REQ says that we want to salvage the volume, but we
+ * are waiting for it to go offline first. */
+ if (flags & VOL_SALVAGE_NO_OFFLINE) {
+ VChangeState_r(vp, VOL_STATE_SALVAGING);
+ } else {
+ VChangeState_r(vp, VOL_STATE_SALVAGE_REQ);
+ if (vp->nUsers == 0) {
+ /* normally VOfflineForSalvage_r would be called from
+ * PutVolume et al when nUsers reaches 0, but if
+ * it's already 0, just do it ourselves, since PutVolume
+ * isn't going to get called */
+ VOfflineForSalvage_r(vp);
+ }
+ }
+ /* If we are non-fileserver, we're telling the fileserver to
+ * salvage the vol, so we don't need to give it back separately. */
+ vp->needsPutBack = 0;
- } else if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
- VChangeState_r(vp, VOL_STATE_SALVAGING);
*ec = VSALVAGING;
} else {
Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
code = 1;
}
if (flags & VOL_SALVAGE_INVALIDATE_HEADER) {
- /* Instead of ReleaseVolumeHeader, we do FreeVolumeHeader()
- so that the the next VAttachVolumeByVp_r() invocation
- of attach2() will pull in a cached header
- entry and fail, then load a fresh one from disk and attach
- it to the volume.
+ /* Instead of ReleaseVolumeHeader, we do FreeVolumeHeader()
+ so that the the next VAttachVolumeByVp_r() invocation
+ of attach2() will pull in a cached header
+ entry and fail, then load a fresh one from disk and attach
+ it to the volume.
*/
FreeVolumeHeader(vp);
}
*
* @note DAFS fileserver only
*
- * @note this should be called whenever a VGetVolume fails due to a
+ * @note this should be called whenever a VGetVolume fails due to a
* pending salvage request
*
* @todo should set exclusive state and drop glock around salvsync call
now = FT_ApproxTime();
/* update the salvageserver priority queue occasionally so that
- * frequently requested volumes get moved to the head of the queue
+ * frequently requested volumes get moved to the head of the queue
*/
if ((vp->salvage.scheduled) &&
(vp->stats.last_salvage_req < (now-SALVAGE_PRIO_UPDATE_INTERVAL))) {
}
+#if defined(SALVSYNC_BUILD_CLIENT) || defined(FSSYNC_BUILD_CLIENT)
+
+/* A couple of little helper functions. These return true if we tried to
+ * use this mechanism to schedule a salvage, false if we haven't tried.
+ * If we did try a salvage then the results are contained in code.
+ */
+
+static_inline int
+try_SALVSYNC(Volume *vp, char *partName, int *code) {
#ifdef SALVSYNC_BUILD_CLIENT
+ if (VCanUseSALVSYNC()) {
+ Log("Scheduling salvage for volume %lu on part %s over SALVSYNC\n",
+ afs_printable_uint32_lu(vp->hashid), partName);
+
+ /* can't use V_id() since there's no guarantee
+ * we have the disk data header at this point */
+ *code = SALVSYNC_SalvageVolume(vp->hashid,
+ partName,
+ SALVSYNC_SALVAGE,
+ vp->salvage.reason,
+ vp->salvage.prio,
+ NULL);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static_inline int
+try_FSSYNC(Volume *vp, char *partName, int *code) {
+#ifdef FSSYNC_BUILD_CLIENT
+ if (VCanUseFSSYNC()) {
+ Log("Scheduling salvage for volume %lu on part %s over FSSYNC\n",
+ afs_printable_uint32_lu(vp->hashid), partName);
+
+ /*
+ * If we aren't the fileserver, tell the fileserver the volume
+ * needs to be salvaged. We could directly tell the
+ * salvageserver, but the fileserver keeps track of some stats
+ * related to salvages, and handles some other salvage-related
+ * complications for us.
+ */
+ *code = FSYNC_VolOp(vp->hashid, partName,
+ FSYNC_VOL_FORCE_ERROR, FSYNC_SALVAGE, NULL);
+ return 1;
+ }
+#endif /* FSSYNC_BUILD_CLIENT */
+ return 0;
+}
+
/**
- * schedule a salvage with the salvage server.
+ * schedule a salvage with the salvage server or fileserver.
*
* @param[in] vp pointer to volume object
*
* @return operation status
* @retval 0 salvage scheduled successfully
- * @retval 1 salvage not scheduled, or SALVSYNC com error
+ * @retval 1 salvage not scheduled, or SALVSYNC/FSSYNC com error
*
- * @pre
+ * @pre
* @arg VOL_LOCK is held.
* @arg nUsers and nWaiters should be zero.
*
- * @post salvageserver is sent a salvage request
+ * @post salvageserver or fileserver is sent a salvage request
*
- * @note DAFS fileserver only
+ * @note If we are the fileserver, the request will be sent to the salvage
+ * server over SALVSYNC. If we are not the fileserver, the request will be
+ * sent to the fileserver over FSSYNC (FSYNC_VOL_FORCE_ERROR/FSYNC_SALVAGE).
+ *
+ * @note the caller must check if the volume needs to be freed after calling
+ * this; the volume may not have any references or be on any lists after
+ * we return, and we do not free it
+ *
+ * @note DAFS only
*
* @internal volume package internal use only.
*/
VScheduleSalvage_r(Volume * vp)
{
int ret=0;
- int code;
+ int code = 0;
VolState state_save;
VThreadOptions_t * thread_opts;
char partName[16];
+ osi_Assert(VCanUseSALVSYNC() || VCanUseFSSYNC());
+
if (vp->nWaiters || vp->nUsers) {
return 1;
}
return 1;
}
+ if (vp->salvage.scheduled) {
+ return ret;
+ }
+
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+
/*
* XXX the scheduling process should really be done asynchronously
* to avoid fssync deadlocks
*/
if (!vp->salvage.scheduled) {
- /* if we haven't previously scheduled a salvage, do so now
+ /* if we haven't previously scheduled a salvage, do so now
*
* set the volume to an exclusive state and drop the lock
* around the SALVSYNC call
- *
- * note that we do NOT acquire a reservation here -- doing so
- * could result in unbounded recursion
*/
- strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName));
+ strlcpy(partName, vp->partition->name, sizeof(partName));
state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
VOL_UNLOCK;
- /* can't use V_id() since there's no guarantee
- * we have the disk data header at this point */
- code = SALVSYNC_SalvageVolume(vp->hashid,
- partName,
- SALVSYNC_SALVAGE,
- vp->salvage.reason,
- vp->salvage.prio,
- NULL);
+ osi_Assert(try_SALVSYNC(vp, partName, &code) ||
+ try_FSSYNC(vp, partName, &code));
+
VOL_LOCK;
VChangeState_r(vp, state_save);
if (code == SYNC_OK) {
vp->salvage.scheduled = 1;
- vp->stats.salvages++;
vp->stats.last_salvage_req = FT_ApproxTime();
- IncUInt64(&VStats.salvages);
+ if (VCanUseSALVSYNC()) {
+ /* don't record these stats for non-fileservers; let the
+ * fileserver take care of these */
+ vp->stats.salvages++;
+ IncUInt64(&VStats.salvages);
+ }
} else {
ret = 1;
switch(code) {
case SYNC_COM_ERROR:
break;
case SYNC_DENIED:
- Log("VScheduleSalvage_r: SALVSYNC request denied\n");
+ Log("VScheduleSalvage_r: Salvage request for volume %lu "
+ "denied\n", afs_printable_uint32_lu(vp->hashid));
break;
default:
- Log("VScheduleSalvage_r: SALVSYNC unknown protocol error\n");
+ Log("VScheduleSalvage_r: Salvage request for volume %lu "
+ "received unknown protocol error %d\n",
+ afs_printable_uint32_lu(vp->hashid), code);
break;
}
+
+ if (VCanUseFSSYNC()) {
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ }
}
}
+
+ /* NB: this is cancelling the reservation we obtained above, but we do
+ * not call VCancelReservation_r, since that may trigger the vp dtor,
+ * possibly free'ing the vp. We need to keep the vp around after
+ * this, as the caller may reference vp without any refs. Instead, it
+ * is the duty of the caller to inspect 'vp' after we return to see if
+ * needs to be freed. */
+ osi_Assert(--vp->nWaiters >= 0);
return ret;
}
+#endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */
+
+#ifdef SALVSYNC_BUILD_CLIENT
/**
* connect to the salvageserver SYNC service.
* @return operation status
* @retval 0 success
*
- * @pre
+ * @pre
* @arg VOL_LOCK is held.
* @arg client should have a live connection to the salvageserver.
*
*/
int
VDisconnectSALV_r(void)
-{
+{
return SALVSYNC_clientFinis();
}
* @retval 0 failure
* @retval 1 success
*
- * @pre
+ * @pre
* @arg VOL_LOCK is held.
* @arg client should have a live connection to the salvageserver.
*
* @retval 0 failure
* @retval 1 success
*
- * @pre
+ * @pre
* @arg VInit must equal 2.
* @arg Program Type must not be fileserver or salvager.
*
* @retval 0 failure
* @retval 1 success
*
- * @pre
+ * @pre
* @arg VInit must equal 2.
* @arg Program Type must not be fileserver or salvager.
* @arg VOL_LOCK is held.
VConnectFS_r(void)
{
int rc;
- assert((VInit == 2) &&
+ osi_Assert((VInit == 2) &&
(programType != fileServer) &&
(programType != salvager));
rc = FSYNC_clientInit();
- if (rc)
- VInit = 3;
+ if (rc) {
+ VSetVInit_r(3);
+ }
return rc;
}
/**
* disconnect from the fileserver SYNC service.
*
- * @pre
+ * @pre
* @arg client should have a live connection to the fileserver.
* @arg VOL_LOCK is held.
* @arg Program Type must not be fileserver or salvager.
void
VDisconnectFS_r(void)
{
- assert((programType != fileServer) &&
+ osi_Assert((programType != fileServer) &&
(programType != salvager));
FSYNC_clientFinis();
- VInit = 2;
+ VSetVInit_r(2);
}
/**
/* volume bitmap routines */
/***************************************************/
+/**
+ * allocate a vnode bitmap number for the vnode
+ *
+ * @param[out] ec error code
+ * @param[in] vp volume object pointer
+ * @param[in] index vnode index number for the vnode
+ * @param[in] flags flag values described in note
+ *
+ * @note for DAFS, flags parameter controls locking behavior.
+ * If (flags & VOL_ALLOC_BITMAP_WAIT) is set, then this function
+ * will create a reservation and block on any other exclusive
+ * operations. Otherwise, this function assumes the caller
+ * already has exclusive access to vp, and we just change the
+ * volume state.
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return bit number allocated
+ */
/*
- * For demand attach fs, flags parameter controls
- * locking behavior. If (flags & VOL_ALLOC_BITMAP_WAIT)
- * is set, then this function will create a reservation
- * and block on any other exclusive operations. Otherwise,
- * this function assumes the caller already has exclusive
- * access to vp, and we just change the volume state.
- */
-VnodeId
-VAllocBitmapEntry_r(Error * ec, Volume * vp,
+
+ */
+int
+VAllocBitmapEntry_r(Error * ec, Volume * vp,
struct vnodeIndex *index, int flags)
{
- VnodeId ret;
- register byte *bp, *ep;
+ int ret = 0;
+ byte *bp, *ep;
#ifdef AFS_DEMAND_ATTACH_FS
VolState state_save;
#endif /* AFS_DEMAND_ATTACH_FS */
/* This test is probably redundant */
if (!VolumeWriteable(vp)) {
*ec = (bit32) VREADONLY;
- return 0;
+ return ret;
}
#ifdef AFS_DEMAND_ATTACH_FS
vp->shuttingDown = 1; /* Let who has it free it. */
vp->specialStatus = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
- ret = NULL;
goto done;
}
}
bp++;
o = ffs(~*bp) - 1; /* ffs is documented in BSTRING(3) */
*bp |= (1 << o);
- ret = (VnodeId) ((bp - index->bitmap) * 8 + o);
+ ret = ((bp - index->bitmap) * 8 + o);
#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
#endif /* AFS_DEMAND_ATTACH_FS */
/* No bit map entry--must grow bitmap */
bp = (byte *)
realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE);
- assert(bp != NULL);
+ osi_Assert(bp != NULL);
index->bitmap = bp;
bp += index->bitmapSize;
memset(bp, 0, VOLUME_BITMAP_GROWSIZE);
return ret;
}
-VnodeId
-VAllocBitmapEntry(Error * ec, Volume * vp, register struct vnodeIndex * index)
+int
+VAllocBitmapEntry(Error * ec, Volume * vp, struct vnodeIndex * index)
{
- VnodeId retVal;
+ int retVal;
VOL_LOCK;
retVal = VAllocBitmapEntry_r(ec, vp, index, VOL_ALLOC_BITMAP_WAIT);
VOL_UNLOCK;
}
void
-VFreeBitMapEntry_r(Error * ec, register struct vnodeIndex *index,
- unsigned bitNumber)
+VFreeBitMapEntry_r(Error * ec, Volume *vp, struct vnodeIndex *index,
+ unsigned bitNumber, int flags)
{
unsigned int offset;
*ec = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (flags & VOL_FREE_BITMAP_WAIT) {
+ /* VAllocBitmapEntry_r allocs bitmap entries under an exclusive volume
+ * state, so ensure we're not in an exclusive volume state when we update
+ * the bitmap */
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ }
+#endif
+
#ifdef BITMAP_LATER
if (!index->bitmap)
- return;
+ goto done;
#endif /* BITMAP_LATER */
+
offset = bitNumber >> 3;
if (offset >= index->bitmapSize) {
*ec = VNOVNODE;
- return;
+ goto done;
}
if (offset < index->bitmapOffset)
index->bitmapOffset = offset & ~3; /* Truncate to nearest bit32 */
*(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
+
+ done:
+#ifdef AFS_DEMAND_ATTACH_FS
+ VCancelReservation_r(vp);
+#endif
+ return; /* make the compiler happy for non-DAFS */
}
void
-VFreeBitMapEntry(Error * ec, register struct vnodeIndex *index,
+VFreeBitMapEntry(Error * ec, Volume *vp, struct vnodeIndex *index,
unsigned bitNumber)
{
VOL_LOCK;
- VFreeBitMapEntry_r(ec, index, bitNumber);
+ VFreeBitMapEntry_r(ec, vp, index, bitNumber, VOL_FREE_BITMAP_WAIT);
VOL_UNLOCK;
}
VGetBitmap_r(Error * ec, Volume * vp, VnodeClass class)
{
StreamHandle_t *file;
- int nVnodes;
- int size;
+ afs_sfsize_t nVnodes, size;
struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
struct vnodeIndex *vip = &vp->vnodeIndex[class];
struct VnodeDiskObject *vnode;
VOL_UNLOCK;
fdP = IH_OPEN(vip->handle);
- assert(fdP != NULL);
+ osi_Assert(fdP != NULL);
file = FDH_FDOPEN(fdP, "r");
- assert(file != NULL);
+ osi_Assert(file != NULL);
vnode = (VnodeDiskObject *) malloc(vcp->diskSize);
- assert(vnode != NULL);
+ osi_Assert(vnode != NULL);
size = OS_SIZE(fdP->fd_fd);
- assert(size != -1);
+ osi_Assert(size != -1);
nVnodes = (size <= vcp->diskSize ? 0 : size - vcp->diskSize)
>> vcp->logSize;
vip->bitmapSize = ((nVnodes / 8) + 10) / 4 * 4; /* The 10 is a little extra so
* it that way */
#ifdef BITMAP_LATER
BitMap = (byte *) calloc(1, vip->bitmapSize);
- assert(BitMap != NULL);
+ osi_Assert(BitMap != NULL);
#else /* BITMAP_LATER */
vip->bitmap = (byte *) calloc(1, vip->bitmapSize);
- assert(vip->bitmap != NULL);
+ osi_Assert(vip->bitmap != NULL);
vip->bitmapOffset = 0;
#endif /* BITMAP_LATER */
- if (STREAM_SEEK(file, vcp->diskSize, 0) != -1) {
+ if (STREAM_ASEEK(file, vcp->diskSize) != -1) {
int bitNumber = 0;
for (bitNumber = 0; bitNumber < nVnodes + 100; bitNumber++) {
if (STREAM_READ(vnode, vcp->diskSize, 1, file) != 1)
struct DiskPartition64 *dp;
*ec = 0;
- name[0] = '/';
- (void)afs_snprintf(&name[1], (sizeof name) - 1, VFORMAT, afs_printable_uint32_lu(volumeId));
+ name[0] = OS_DIRSEPC;
+ snprintf(&name[1], (sizeof name) - 1, VFORMAT,
+ afs_printable_uint32_lu(volumeId));
for (dp = DiskPartitionList; dp; dp = dp->next) {
- struct afs_stat status;
+ struct afs_stat_st status;
strcpy(path, VPartitionPath(dp));
strcat(path, name);
if (afs_stat(path, &status) == 0) {
* @return volume number
*
* @note the string must be of the form VFORMAT. the only permissible
- * deviation is a leading '/' character.
+ * deviation is a leading OS_DIRSEPC character.
*
* @see VFORMAT
*/
int
VolumeNumber(char *name)
{
- if (*name == '/')
+ if (*name == OS_DIRSEPC)
name++;
- return atoi(name + 1);
+ return strtoul(name + 1, NULL, 10);
}
/**
VolumeExternalName(VolumeId volumeId)
{
static char name[VMAXPATHLEN];
- (void)afs_snprintf(name, sizeof name, VFORMAT, afs_printable_uint32_lu(volumeId));
+ snprintf(name, sizeof name, VFORMAT, afs_printable_uint32_lu(volumeId));
return name;
}
* @see afs_snprintf
*
* @note re-entrant equivalent of VolumeExternalName
- *
- * @internal volume package internal use only.
*/
-#ifdef AFS_DEMAND_ATTACH_FS
-static int
+int
VolumeExternalName_r(VolumeId volumeId, char * name, size_t len)
{
- return afs_snprintf(name, len, VFORMAT, afs_printable_uint32_lu(volumeId));
+ return snprintf(name, len, VFORMAT, afs_printable_uint32_lu(volumeId));
}
-#endif
/***************************************************/
*------------------------------------------------------------------------*/
int
-VAdjustVolumeStatistics_r(register Volume * vp)
+VAdjustVolumeStatistics_r(Volume * vp)
{
unsigned int now = FT_ApproxTime();
if (now - V_dayUseDate(vp) > OneDay) {
- register int ndays, i;
+ int ndays, i;
ndays = (now - V_dayUseDate(vp)) / OneDay;
for (i = 6; i > ndays - 1; i--)
} /*VAdjustVolumeStatistics */
int
-VAdjustVolumeStatistics(register Volume * vp)
+VAdjustVolumeStatistics(Volume * vp)
{
int retVal;
VOL_LOCK;
}
void
-VBumpVolumeUsage_r(register Volume * vp)
+VBumpVolumeUsage_r(Volume * vp)
{
unsigned int now = FT_ApproxTime();
V_accessDate(vp) = now;
}
void
-VBumpVolumeUsage(register Volume * vp)
+VBumpVolumeUsage(Volume * vp)
{
VOL_LOCK;
VBumpVolumeUsage_r(vp);
* initialization level indicates that all volumes are attached,
* which implies that all partitions are initialized. */
#ifdef AFS_PTHREAD_ENV
- sleep(10);
+ VOL_CV_WAIT(&vol_vinit_cond);
#else /* AFS_PTHREAD_ENV */
IOMGR_Sleep(10);
#endif /* AFS_PTHREAD_ENV */
sizeof(VolumeId) * updateSize);
}
}
- assert(UpdateList != NULL);
+ osi_Assert(UpdateList != NULL);
UpdateList[nUpdatedVolumes++] = V_id(vp);
#endif /* !AFS_DEMAND_ATTACH_FS */
}
static void
VScanUpdateList(void)
{
- register int i, gap;
- register Volume *vp;
+ int i, gap;
+ Volume *vp;
Error error;
afs_uint32 now = FT_ApproxTime();
/* Be careful with this code, since it works with interleaved calls to AddToVolumeUpdateList */
* in order to speed up fileserver shutdown
*
* (1) by soft detach we mean a process very similar
- * to VOffline, except the final state of the
+ * to VOffline, except the final state of the
* Volume will be VOL_STATE_PREATTACHED, instead
* of the usual VOL_STATE_UNATTACHED
*/
* @note DAFS only
*
* @note valid option parameters are:
- * @arg @c VLRU_SET_THRESH
+ * @arg @c VLRU_SET_THRESH
* set the period of inactivity after which
* volumes are eligible for soft detachment
- * @arg @c VLRU_SET_INTERVAL
+ * @arg @c VLRU_SET_INTERVAL
* set the time interval between calls
* to the volume LRU "garbage collector"
- * @arg @c VLRU_SET_MAX
+ * @arg @c VLRU_SET_MAX
* set the max number of volumes to deallocate
* in one GC pass
*/
*
* @post VLRU scanner thread internal timing parameters are computed
*
- * @note computes internal timing parameters based upon user-modifiable
+ * @note computes internal timing parameters based upon user-modifiable
* tunable parameters.
*
* @note DAFS only
queue_Init(&volume_LRU.q[i]);
volume_LRU.q[i].len = 0;
volume_LRU.q[i].busy = 0;
- assert(pthread_cond_init(&volume_LRU.q[i].cv, NULL) == 0);
+ CV_INIT(&volume_LRU.q[i].cv, "vol lru", CV_DEFAULT, 0);
}
/* setup the timing constants */
/* start up the VLRU scanner */
volume_LRU.scanner_state = VLRU_SCANNER_STATE_OFFLINE;
if (programType == fileServer) {
- assert(pthread_cond_init(&volume_LRU.cv, NULL) == 0);
- assert(pthread_attr_init(&attrs) == 0);
- assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
- assert(pthread_create(&tid, &attrs, &VLRU_ScannerThread, NULL) == 0);
+ CV_INIT(&volume_LRU.cv, "vol lru", CV_DEFAULT, 0);
+ osi_Assert(pthread_attr_init(&attrs) == 0);
+ osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ osi_Assert(pthread_create(&tid, &attrs, &VLRU_ScannerThread, NULL) == 0);
}
}
if (!VLRU_enabled)
return;
- assert(queue_IsNotOnQueue(&vp->vlru));
+ osi_Assert(queue_IsNotOnQueue(&vp->vlru));
vp->vlru.idx = VLRU_QUEUE_INVALID;
}
*
* @note DAFS only
*
- * @todo We should probably set volume state to something exlcusive
+ * @todo We should probably set volume state to something exlcusive
* (as @c VLRU_Add_r does) prior to dropping @c VOL_LOCK.
*
* @internal volume package internal use only.
VLRU_Wait_r(&volume_LRU.q[idx]);
} while (idx != vp->vlru.idx);
- /* now remove from the VLRU and update
+ /* now remove from the VLRU and update
* the appropriate counter */
queue_Remove(&vp->vlru);
volume_LRU.q[idx].len--;
if (queue_IsNotOnQueue(&vp->vlru))
return;
- assert(V_attachFlags(vp) & VOL_ON_VLRU);
+ osi_Assert(V_attachFlags(vp) & VOL_ON_VLRU);
/* update the access timestamp */
vp->stats.last_get = FT_ApproxTime();
*
* @param[in] vp pointer to volume object
* @param[in] new_idx index of VLRU queue onto which the volume will be moved
- * @param[in] append controls whether the volume will be appended or
+ * @param[in] append controls whether the volume will be appended or
* prepended to the queue. A nonzero value means it will
* be appended; zero means it will be prepended.
*
- * @pre The new (and old, if applicable) queue(s) must either be owned
+ * @pre The new (and old, if applicable) queue(s) must either be owned
* exclusively by the calling thread for asynchronous manipulation,
* or the queue(s) must be quiescent and VOL_LOCK must be held.
* Please see VLRU_BeginExclusive_r, VLRU_EndExclusive_r and VLRU_Wait_r
queue_Remove(&vp->vlru);
volume_LRU.q[vp->vlru.idx].len--;
-
+
/* put the volume back on the correct generational queue */
if (append) {
queue_Append(&volume_LRU.q[new_idx], &vp->vlru);
afs_uint32 now, min_delay, delay;
int i, min_idx, min_op, overdue, state;
- /* set t=0 for promotion cycle to be
+ /* set t=0 for promotion cycle to be
* fileserver startup */
now = FT_ApproxTime();
for (i=0; i < VLRU_GENERATIONS-1; i++) {
/* check to see if we've been asked to pause */
if (volume_LRU.scanner_state == VLRU_SCANNER_STATE_PAUSING) {
volume_LRU.scanner_state = VLRU_SCANNER_STATE_PAUSED;
- assert(pthread_cond_broadcast(&volume_LRU.cv) == 0);
+ CV_BROADCAST(&volume_LRU.cv);
do {
VOL_CV_WAIT(&volume_LRU.cv);
} while (volume_LRU.scanner_state == VLRU_SCANNER_STATE_PAUSED);
/* signal that scanner is down */
volume_LRU.scanner_state = VLRU_SCANNER_STATE_OFFLINE;
- assert(pthread_cond_broadcast(&volume_LRU.cv) == 0);
+ CV_BROADCAST(&volume_LRU.cv);
VOL_UNLOCK;
return NULL;
}
*
* @arg The volume has been accessed since the last promotion:
* @c (vp->stats.last_get >= vp->stats.last_promote)
- * @arg The last promotion occurred at least
+ * @arg The last promotion occurred at least
* @c volume_LRU.promotion_interval[idx] seconds ago
*
* As a performance optimization, promotions are "globbed". In other
* words, we promote arbitrarily large contiguous sublists of elements
- * as one operation.
+ * as one operation.
*
* @param[in] idx VLRU queue index to scan
*
Volume ** salv_flag_vec = NULL;
int salv_vec_offset = 0;
- assert(idx == VLRU_QUEUE_MID || idx == VLRU_QUEUE_OLD);
+ osi_Assert(idx == VLRU_QUEUE_MID || idx == VLRU_QUEUE_OLD);
/* get exclusive access to two chains, and drop the glock */
VLRU_Wait_r(&volume_LRU.q[idx-1]);
* demotion passes */
if (salv_flag_vec &&
!(V_attachFlags(vp) & VOL_HDR_DONTSALV) &&
- demote &&
+ demote &&
(vp->updateTime < (now - SALVAGE_INTERVAL)) &&
(V_attachState(vp) == VOL_STATE_ATTACHED)) {
salv_flag_vec[salv_vec_offset++] = vp;
Volume * vp;
int i, locked = 1;
- assert(idx == VLRU_QUEUE_NEW || idx == VLRU_QUEUE_CANDIDATE);
+ osi_Assert(idx == VLRU_QUEUE_NEW || idx == VLRU_QUEUE_CANDIDATE);
/* gain exclusive access to the idx VLRU */
VLRU_Wait_r(&volume_LRU.q[idx]);
return ret;
}
-/* check whether volume should be made a
+/* check whether volume should be made a
* soft detach candidate */
static int
VCheckSoftDetachCandidate(Volume * vp, afs_uint32 thresh)
idx = vp->vlru.idx;
- assert(idx == VLRU_QUEUE_NEW);
+ osi_Assert(idx == VLRU_QUEUE_NEW);
if (vp->stats.last_get <= thresh) {
/* move to candidate pool */
static void
VLRU_BeginExclusive_r(struct VLRU_q * q)
{
- assert(q->busy == 0);
+ osi_Assert(q->busy == 0);
q->busy = 1;
}
static void
VLRU_EndExclusive_r(struct VLRU_q * q)
{
- assert(q->busy);
+ osi_Assert(q->busy);
q->busy = 0;
- assert(pthread_cond_broadcast(&q->cv) == 0);
+ CV_BROADCAST(&q->cv);
}
/* wait for another thread to end exclusive access on VLRU */
afs_uint32 ts_save;
int ret = 0;
- assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
+ osi_Assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
ts_save = vp->stats.last_get;
if (ts_save > thresh)
case VOL_STATE_GOING_OFFLINE:
case VOL_STATE_SHUTTING_DOWN:
case VOL_STATE_SALVAGING:
+ case VOL_STATE_DELETED:
volume_LRU.q[vp->vlru.idx].len--;
/* create and cancel a reservation to
/* vhold drops the glock, so now we should
* check to make sure we aren't racing against
* other threads. if we are racing, offlining vp
- * would be wasteful, and block the scanner for a while
+ * would be wasteful, and block the scanner for a while
*/
- if (vp->nWaiters ||
+ if (vp->nWaiters ||
(vp->nUsers > 1) ||
(vp->shuttingDown) ||
(vp->goingOffline) ||
vp = NULL;
} else {
/* pull it off the VLRU */
- assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
+ osi_Assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
volume_LRU.q[VLRU_QUEUE_CANDIDATE].len--;
queue_Remove(&vp->vlru);
vp->vlru.idx = VLRU_QUEUE_INVALID;
/* Volume Header Cache routines */
/***************************************************/
-/**
+/**
* volume header cache.
*/
struct volume_hdr_LRU_t volume_hdr_LRU;
*
* @pre VOL_LOCK held. Function has never been called before.
*
- * @post howMany cache entries are allocated, initialized, and added
+ * @post howMany cache entries are allocated, initialized, and added
* to the LRU list. Header cache statistics are initialized.
*
* @note only applicable to fileServer program type. Should only be
static void
VInitVolumeHeaderCache(afs_uint32 howMany)
{
- register struct volHeader *hp;
+ struct volHeader *hp;
if (programType != fileServer)
return;
queue_Init(&volume_hdr_LRU);
volume_hdr_LRU.stats.used = howMany;
volume_hdr_LRU.stats.attached = 0;
hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader)));
- assert(hp != NULL);
+ osi_Assert(hp != NULL);
while (howMany--)
/* We are using ReleaseVolumeHeader to initialize the values on the header list
*
* @pre VOL_LOCK held. For DAFS, lightweight ref must be held on volume object.
*
- * @post volume header attached to volume object. if necessary, header cache
+ * @post volume header attached to volume object. if necessary, header cache
* entry on LRU is synchronized to disk. Header is removed from LRU list.
*
* @note VOL_LOCK may be dropped
* @internal volume package internal use only.
*/
static int
-GetVolumeHeader(register Volume * vp)
+GetVolumeHeader(Volume * vp)
{
Error error;
- register struct volHeader *hd;
+ struct volHeader *hd;
int old;
static int everLogged = 0;
/* for volume utilities, we allocate volHeaders as needed */
if (!vp->header) {
hd = (struct volHeader *)calloc(1, sizeof(*vp->header));
- assert(hd != NULL);
+ osi_Assert(hd != NULL);
vp->header = hd;
hd->back = vp;
#ifdef AFS_DEMAND_ATTACH_FS
* still available. pull it off the lru and return */
hd = vp->header;
queue_Remove(hd);
- assert(hd->back == vp);
+ osi_Assert(hd->back == vp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ V_attachFlags(vp) &= ~(VOL_HDR_IN_LRU);
+#endif
} else {
/* we need to grab a new element off the LRU */
if (queue_IsNotEmpty(&volume_hdr_LRU)) {
hd = queue_First(&volume_hdr_LRU, volHeader);
queue_Remove(hd);
} else {
- /* LRU is empty, so allocate a new volHeader
+ /* LRU is empty, so allocate a new volHeader
* this is probably indicative of a leak, so let the user know */
hd = (struct volHeader *)calloc(1, sizeof(struct volHeader));
- assert(hd != NULL);
+ osi_Assert(hd != NULL);
if (!everLogged) {
Log("****Allocated more volume headers, probably leak****\n");
everLogged = 1;
volume_hdr_LRU.stats.free++;
}
if (hd->back) {
- /* this header used to belong to someone else.
+ /* this header used to belong to someone else.
* we'll need to check if the header needs to
* be sync'd out to disk */
#ifdef AFS_DEMAND_ATTACH_FS
/* if hd->back were in an exclusive state, then
* its volHeader would not be on the LRU... */
- assert(!VIsExclusiveState(V_attachState(hd->back)));
+ osi_Assert(!VIsExclusiveState(V_attachState(hd->back)));
#endif
if (hd->diskstuff.inUse) {
* @internal volume package internal use only.
*/
static void
-ReleaseVolumeHeader(register struct volHeader *hd)
+ReleaseVolumeHeader(struct volHeader *hd)
{
if (programType != fileServer)
return;
* @internal volume package internal use only.
*/
static void
-FreeVolumeHeader(register Volume * vp)
+FreeVolumeHeader(Volume * vp)
{
- register struct volHeader *hd = vp->header;
+ struct volHeader *hd = vp->header;
if (!hd)
return;
if (programType == fileServer) {
*
* @post Volume Hash Table will have 2^logsize buckets
*/
-int
+int
VSetVolHashSize(int logsize)
{
- /* 64 to 16384 hash buckets seems like a reasonable range */
- if ((logsize < 6 ) || (logsize > 14)) {
+ /* 64 to 268435456 hash buckets seems like a reasonable range */
+ if ((logsize < 6 ) || (logsize > 28)) {
return -1;
}
-
+
if (!VInit) {
VolumeHashTable.Size = 1 << logsize;
VolumeHashTable.Mask = VolumeHashTable.Size - 1;
static void
VInitVolumeHash(void)
{
- register int i;
+ int i;
- VolumeHashTable.Table = (VolumeHashChainHead *) calloc(VolumeHashTable.Size,
+ VolumeHashTable.Table = (VolumeHashChainHead *) calloc(VolumeHashTable.Size,
sizeof(VolumeHashChainHead));
- assert(VolumeHashTable.Table != NULL);
-
+ osi_Assert(VolumeHashTable.Table != NULL);
+
for (i=0; i < VolumeHashTable.Size; i++) {
queue_Init(&VolumeHashTable.Table[i]);
#ifdef AFS_DEMAND_ATTACH_FS
- assert(pthread_cond_init(&VolumeHashTable.Table[i].chain_busy_cv, NULL) == 0);
+ CV_INIT(&VolumeHashTable.Table[i].chain_busy_cv, "vhash busy", CV_DEFAULT, 0);
#endif /* AFS_DEMAND_ATTACH_FS */
}
}
* asynchronous hash chain reordering to finish.
*/
static void
-AddVolumeToHashTable(register Volume * vp, int hashid)
+AddVolumeToHashTable(Volume * vp, int hashid)
{
VolumeHashChainHead * head;
* asynchronous hash chain reordering to finish.
*/
static void
-DeleteVolumeFromHashTable(register Volume * vp)
+DeleteVolumeFromHashTable(Volume * vp)
{
VolumeHashChainHead * head;
*
* @param[out] ec error code return
* @param[in] volumeId volume id
- * @param[in] hint volume object which we believe could be the correct
+ * @param[in] hint volume object which we believe could be the correct
mapping
*
* @return volume object pointer
* @retval NULL no such volume id is registered with the hash table.
*
- * @pre VOL_LOCK is held. For DAFS, caller must hold a lightweight
+ * @pre VOL_LOCK is held. For DAFS, caller must hold a lightweight
ref on hint.
*
- * @post volume object with the given id is returned. volume object and
- * hash chain access statistics are updated. hash chain may have
+ * @post volume object with the given id is returned. volume object and
+ * hash chain access statistics are updated. hash chain may have
* been reordered.
*
- * @note For DAFS, VOL_LOCK may be dropped in order to wait for an
- * asynchronous hash chain reordering operation to finish, or
+ * @note For DAFS, VOL_LOCK may be dropped in order to wait for an
+ * asynchronous hash chain reordering operation to finish, or
* in order for us to perform an asynchronous chain reordering.
*
- * @note Hash chain reorderings occur when the access count for the
- * volume object being looked up exceeds the sum of the previous
- * node's (the node ahead of it in the hash chain linked list)
+ * @note Hash chain reorderings occur when the access count for the
+ * volume object being looked up exceeds the sum of the previous
+ * node's (the node ahead of it in the hash chain linked list)
* access count plus the constant VOLUME_HASH_REORDER_THRESHOLD.
*
- * @note For DAFS, the hint parameter allows us to short-circuit if the
- * cacheCheck fields match between the hash chain head and the
+ * @note For DAFS, the hint parameter allows us to short-circuit if the
+ * cacheCheck fields match between the hash chain head and the
* hint volume object.
*/
Volume *
VLookupVolume_r(Error * ec, VolId volumeId, Volume * hint)
{
- register int looks = 0;
+ int looks = 0;
Volume * vp, *np;
#ifdef AFS_DEMAND_ATTACH_FS
Volume *pp;
#endif /* AFS_DEMAND_ATTACH_FS */
/* someday we need to either do per-chain locks, RWlocks,
- * or both for volhash access.
+ * or both for volhash access.
* (and move to a data structure with better cache locality) */
/* search the chain for this volume id */
/* update the short-circuit cache check */
vp->chainCacheCheck = head->cacheCheck;
}
-#endif /* AFS_DEMAND_ATTACH_FS */
+#endif /* AFS_DEMAND_ATTACH_FS */
return vp;
}
static void
VHashBeginExclusive_r(VolumeHashChainHead * head)
{
- assert(head->busy == 0);
+ osi_Assert(head->busy == 0);
head->busy = 1;
}
static void
VHashEndExclusive_r(VolumeHashChainHead * head)
{
- assert(head->busy);
+ osi_Assert(head->busy);
head->busy = 0;
- assert(pthread_cond_broadcast(&head->chain_busy_cv) == 0);
+ CV_BROADCAST(&head->chain_busy_cv);
}
/**
* @note This interface should be called before any attempt to
* traverse the hash chain. It is permissible for a thread
* to gain exclusive access to the chain, and then perform
- * latent operations on the chain asynchronously wrt the
+ * latent operations on the chain asynchronously wrt the
* VOL_LOCK.
*
* @warning if waiting is necessary, VOL_LOCK is dropped
static void
VVByPListBeginExclusive_r(struct DiskPartition64 * dp)
{
- assert(dp->vol_list.busy == 0);
+ osi_Assert(dp->vol_list.busy == 0);
dp->vol_list.busy = 1;
}
static void
VVByPListEndExclusive_r(struct DiskPartition64 * dp)
{
- assert(dp->vol_list.busy);
+ osi_Assert(dp->vol_list.busy);
dp->vol_list.busy = 0;
- assert(pthread_cond_broadcast(&dp->vol_list.cv) == 0);
+ CV_BROADCAST(&dp->vol_list.cv);
}
/**
* @note This interface should be called before any attempt to
* traverse the VByPList. It is permissible for a thread
* to gain exclusive access to the list, and then perform
- * latent operations on the list asynchronously wrt the
+ * latent operations on the list asynchronously wrt the
* VOL_LOCK.
*
* @warning if waiting is necessary, VOL_LOCK is dropped
void
VPrintCacheStats_r(void)
{
- afs_uint32 get_hi, get_lo, load_hi, load_lo;
- register struct VnodeClassInfo *vcp;
+ struct VnodeClassInfo *vcp;
vcp = &VnodeClassInfo[vLarge];
Log("Large vnode cache, %d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
vcp = &VnodeClassInfo[vSmall];
Log("Small vnode cache,%d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
- SplitInt64(VStats.hdr_gets, get_hi, get_lo);
- SplitInt64(VStats.hdr_loads, load_hi, load_lo);
- Log("Volume header cache, %d entries, %d gets, %d replacements\n",
- VStats.hdr_cache_size, get_lo, load_lo);
+ Log("Volume header cache, %d entries, %"AFS_INT64_FMT" gets, "
+ "%"AFS_INT64_FMT" replacements\n",
+ VStats.hdr_cache_size, VStats.hdr_gets, VStats.hdr_loads);
}
void
struct VLRUExtStatsEntry * vec;
};
-/**
+/**
* add a 256-entry fudge factor onto the vector in case state changes
* out from under us.
*/
#define ENUMTOSTRING(en) #en
#define ENUMCASE(en) \
- case en: \
- return ENUMTOSTRING(en); \
- break
+ case en: return ENUMTOSTRING(en)
static char *
vlru_idx_to_string(int idx)
reorders.sum += ch_reorders.sum;
len.sum += (double)head->len;
vol_sum += head->len;
-
+
if (i == 0) {
len.min = (double) head->len;
len.max = (double) head->len;
/* dump per-chain stats */
Log("Volume hash chain %d : len=%d, looks=%s, reorders=%s\n",
- i, head->len,
+ i, head->len,
DoubleToPrintable(ch_looks.sum, pr_buf[0], sizeof(pr_buf[0])),
DoubleToPrintable(ch_reorders.sum, pr_buf[1], sizeof(pr_buf[1])));
Log("\tVolume gets : min=%s, max=%s, avg=%s, total=%s\n",
} else if (flags & VOL_STATS_PER_CHAIN) {
/* dump simple per-chain stats */
Log("Volume hash chain %d : len=%d, looks=%s, gets=%s, reorders=%s\n",
- i, head->len,
+ i, head->len,
DoubleToPrintable(ch_looks.sum, pr_buf[0], sizeof(pr_buf[0])),
DoubleToPrintable(ch_gets.sum, pr_buf[1], sizeof(pr_buf[1])),
DoubleToPrintable(ch_reorders.sum, pr_buf[2], sizeof(pr_buf[2])));
* of the VGetPartitionById_r interface contract. */
diskP = VGetPartitionById_r(i, 0);
if (diskP) {
- Log("Partition %s has %d online volumes\n",
+ Log("Partition %s has %d online volumes\n",
VPartitionPath(diskP), diskP->vol_list.len);
}
}
/* print extended VLRU statistics */
if (VVLRUExtStats_r(&vlru_stats, vol_sum) == 0) {
afs_uint32 idx, cur, lpos;
- VOL_UNLOCK;
VolumeId line[5];
+ VOL_UNLOCK;
+
Log("VLRU State Dump:\n\n");
for (idx = VLRU_QUEUE_NEW; idx < VLRU_QUEUE_INVALID; idx++) {
{
return vol_opts.canUseSALVSYNC;
}
+
+afs_int32
+VCanUnsafeAttach(void)
+{
+ return vol_opts.unsafe_attach;
+}