#include <afsconfig.h>
#include <afs/param.h>
+#include <roken.h>
+#include <afs/opr.h>
-#include <rx/xdr.h>
-#include <afs/afsint.h>
#include <ctype.h>
-#include <signal.h>
+#include <stddef.h>
+
+#ifdef HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
+
+#ifdef AFS_PTHREAD_ENV
+# include <opr/lock.h>
+#else
+# include <opr/lockstub.h>
+#endif
+#include <opr/ffs.h>
+#include <opr/jhash.h>
+
+#include <afs/afsint.h>
+
+#include <rx/rx_queue.h>
+
#ifndef AFS_NT40_ENV
-#include <sys/param.h>
#if !defined(AFS_SGI_ENV)
#ifdef AFS_OSF_ENV
#include <ufs/fs.h>
#endif
#endif
#else /* AFS_VFSINCL_ENV */
-#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
#include <sys/fs.h>
#endif
#endif /* AFS_VFSINCL_ENV */
#endif /* AFS_OSF_ENV */
#endif /* AFS_SGI_ENV */
-#endif /* AFS_NT40_ENV */
-#include <errno.h>
-#include <sys/stat.h>
-#include <stdio.h>
-#ifdef AFS_NT40_ENV
-#include <fcntl.h>
-#else
-#include <sys/file.h>
-#endif
-#include <dirent.h>
+#endif /* !AFS_NT40_ENV */
+
#ifdef AFS_AIX_ENV
#include <sys/vfs.h>
-#include <fcntl.h>
#else
#ifdef AFS_HPUX_ENV
-#include <fcntl.h>
#include <mntent.h>
#else
#if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
#else
#ifndef AFS_NT40_ENV
#if defined(AFS_SGI_ENV)
-#include <fcntl.h>
#include <mntent.h>
-
#else
#ifndef AFS_LINUX20_ENV
#include <fstab.h> /* Need to find in libc 5, present in libc 6 */
#endif
#endif /* AFS_HPUX_ENV */
#endif
-#ifndef AFS_NT40_ENV
-#include <netdb.h>
-#include <netinet/in.h>
-#include <sys/wait.h>
-#include <setjmp.h>
-#ifndef ITIMER_REAL
-#include <sys/time.h>
-#endif /* ITIMER_REAL */
-#endif /* AFS_NT40_ENV */
-#if defined(AFS_SUN5_ENV) || defined(AFS_NT40_ENV) || defined(AFS_LINUX20_ENV)
-#include <string.h>
-#else
-#include <strings.h>
-#endif
#include "nfs.h"
#include <afs/errors.h>
#include <afs/afssyscalls.h>
#include "ihandle.h"
#include <afs/afsutil.h>
-#ifdef AFS_NT40_ENV
-#include <io.h>
-#endif
#include "daemon_com.h"
#include "fssync.h"
#include "salvsync.h"
#include "partition.h"
#include "volume_inline.h"
#include "common.h"
-#include "afs/afs_assert.h"
#include "vutils.h"
-#ifndef AFS_NT40_ENV
#include <afs/dir.h>
-#include <unistd.h>
-#endif
-
-#if !defined(offsetof)
-#include <stddef.h>
-#endif
-
-#ifdef O_LARGEFILE
-#define afs_stat stat64
-#define afs_fstat fstat64
-#define afs_open open64
-#else /* !O_LARGEFILE */
-#define afs_stat stat
-#define afs_fstat fstat
-#define afs_open open
-#endif /* !O_LARGEFILE */
#ifdef AFS_PTHREAD_ENV
pthread_mutex_t vol_glock_mutex;
static volatile sig_atomic_t vol_disallow_salvsync = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
+/**
+ * has VShutdown_r been called / is VShutdown_r running?
+ */
+static int vol_shutting_down = 0;
+
#ifdef AFS_OSF_ENV
extern void *calloc(), *realloc();
#endif
/* Forward declarations */
-static Volume *attach2(Error * ec, VolId volumeId, char *path,
+static Volume *attach2(Error * ec, VolumeId volumeId, char *path,
struct DiskPartition64 *partp, Volume * vp,
- int isbusy, int mode);
+ int isbusy, int mode, int *acheckedOut);
static void ReallyFreeVolume(Volume * vp);
#ifdef AFS_DEMAND_ATTACH_FS
static void FreeVolume(Volume * vp);
static int GetVolumeHeader(Volume * vp);
static void ReleaseVolumeHeader(struct volHeader *hd);
static void FreeVolumeHeader(Volume * vp);
-static void AddVolumeToHashTable(Volume * vp, int hashid);
+static void AddVolumeToHashTable(Volume * vp, VolumeId hashid);
static void DeleteVolumeFromHashTable(Volume * vp);
#if 0
static int VHold(Volume * vp);
static void LoadVolumeHeader(Error * ec, Volume * vp);
static int VCheckOffline(Volume * vp);
static int VCheckDetach(Volume * vp);
-static Volume * GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flags);
+static Volume * GetVolume(Error * ec, Error * client_ec, VolumeId volumeId,
+ Volume * hint, const struct timespec *ts);
-int LogLevel; /* Vice loglevel--not defined as extern so that it will be
- * defined when not linked with vice, XXXX */
ProgramType programType; /* The type of program using the package */
static VolumePackageOptions vol_opts;
#endif
-#define VOLUME_BITMAP_GROWSIZE 16 /* bytes, => 128vnodes */
- /* Must be a multiple of 4 (1 word) !! */
-
/* this parameter needs to be tunable at runtime.
* 128 was really inadequate for largish servers -- at 16384 volumes this
* puts average chain length at 128, thus an average 65 deref's to find a volptr.
* an AVL or splay tree might work a lot better, but we'll just increase
* the default hash table size for now
*/
-#define DEFAULT_VOLUME_HASH_SIZE 256 /* Must be a power of 2!! */
-#define DEFAULT_VOLUME_HASH_MASK (DEFAULT_VOLUME_HASH_SIZE-1)
-#define VOLUME_HASH(volumeId) (volumeId&(VolumeHashTable.Mask))
+#define DEFAULT_VOLUME_HASH_BITS 10
+#define DEFAULT_VOLUME_HASH_SIZE opr_jhash_size(DEFAULT_VOLUME_HASH_BITS)
+#define DEFAULT_VOLUME_HASH_MASK opr_jhash_mask(DEFAULT_VOLUME_HASH_BITS)
+#define VOLUME_HASH(volumeId) \
+ (opr_jhash_int(volumeId, 0) & VolumeHashTable.Mask)
/*
* turn volume hash chains into partially ordered lists.
*/
#define VOLUME_HASH_REORDER_CHAIN_THRESH (VOLUME_HASH_REORDER_THRESHOLD / 2)
+/*
+ * The per volume uniquifier is bumped by 200 and and written to disk
+ * every 200 file creates.
+ */
+#define VOLUME_UPDATE_UNIQUIFIER_BUMP 200
+
#include "rx/rx_queue.h"
static void VInitVolumeHash(void);
-#ifndef AFS_HAVE_FFS
-/* This macro is used where an ffs() call does not exist. Was in util/ffs.c */
-ffs(x)
-{
- afs_int32 ffs_i;
- afs_int32 ffs_tmp = x;
- if (ffs_tmp == 0)
- return (-1);
- else
- for (ffs_i = 1;; ffs_i++) {
- if (ffs_tmp & 1)
- return (ffs_i);
- else
- ffs_tmp >>= 1;
- }
-}
-#endif /* !AFS_HAVE_FFS */
-
#ifdef AFS_PTHREAD_ENV
/**
* disk partition queue element
static void *VInitVolumePackageThread(void *args);
static struct DiskPartition64 *VInitNextPartition(struct partition_queue *pq);
-static VolId VInitNextVolumeId(DIR *dirp);
+static VolumeId VInitNextVolumeId(DIR *dirp);
static int VInitPreAttachVolumes(int nthreads, struct volume_init_queue *vq);
#endif /* !AFS_DEMAND_ATTACH_FS */
};
struct VLRU_DiskEntry {
- afs_uint32 vid; /* volume ID */
+ VolumeId vid; /* volume ID */
afs_uint32 idx; /* generation */
afs_uint32 last_get; /* timestamp of last get */
};
static void VVByPListWait_r(struct DiskPartition64 * dp);
/* online salvager */
+typedef enum {
+ VCHECK_SALVAGE_OK = 0, /**< no pending salvage */
+ VCHECK_SALVAGE_SCHEDULED = 1, /**< salvage has been scheduled */
+ VCHECK_SALVAGE_ASYNC = 2, /**< salvage being scheduled */
+ VCHECK_SALVAGE_DENIED = 3, /**< salvage not scheduled; denied */
+ VCHECK_SALVAGE_FAIL = 4 /**< salvage not scheduled; failed */
+} vsalvage_check;
static int VCheckSalvage(Volume * vp);
#if defined(SALVSYNC_BUILD_CLIENT) || defined(FSSYNC_BUILD_CLIENT)
static int VScheduleSalvage_r(Volume * vp);
opts->canUseFSSYNC = 0;
opts->canUseSALVSYNC = 0;
+ opts->interrupt_rxcall = NULL;
+ opts->offline_timeout = -1;
+ opts->offline_shutdown_timeout = -1;
+ opts->usage_threshold = 128;
+ opts->usage_rate_limit = 5;
+
#ifdef FAST_RESTART
opts->unsafe_attach = 1;
#else /* !FAST_RESTART */
VSetVInit_r(int value)
{
VInit = value;
- CV_BROADCAST(&vol_vinit_cond);
+ opr_cv_broadcast(&vol_vinit_cond);
+}
+
+static_inline void
+VLogOfflineTimeout(const char *type, afs_int32 timeout)
+{
+ if (timeout < 0) {
+ return;
+ }
+ if (timeout == 0) {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "immediately\n", type);
+ } else {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "after %ld second%s\n", type, (long)timeout, timeout==1?"":"s");
+ }
}
int
programType = pt;
vol_opts = *opts;
+#ifndef AFS_PTHREAD_ENV
+ if (opts->offline_timeout != -1 || opts->offline_shutdown_timeout != -1) {
+ Log("VInitVolumePackage: offline_timeout and/or "
+ "offline_shutdown_timeout was specified, but the volume package "
+ "does not support these for LWP builds\n");
+ return -1;
+ }
+#endif
+ VLogOfflineTimeout("volumes going offline", opts->offline_timeout);
+ VLogOfflineTimeout("volumes going offline during shutdown",
+ opts->offline_shutdown_timeout);
+
memset(&VStats, 0, sizeof(VStats));
VStats.hdr_cache_size = 200;
} else {
VLRU_SetOptions(VLRU_SET_ENABLED, 0);
}
- osi_Assert(pthread_key_create(&VThread_key, NULL) == 0);
+ opr_Verify(pthread_key_create(&VThread_key, NULL) == 0);
#endif
- MUTEX_INIT(&vol_glock_mutex, "vol glock", MUTEX_DEFAULT, 0);
- MUTEX_INIT(&vol_trans_mutex, "vol trans", MUTEX_DEFAULT, 0);
- CV_INIT(&vol_put_volume_cond, "vol put", CV_DEFAULT, 0);
- CV_INIT(&vol_sleep_cond, "vol sleep", CV_DEFAULT, 0);
- CV_INIT(&vol_init_attach_cond, "vol init attach", CV_DEFAULT, 0);
- CV_INIT(&vol_vinit_cond, "vol init", CV_DEFAULT, 0);
+ opr_mutex_init(&vol_glock_mutex);
+ opr_mutex_init(&vol_trans_mutex);
+ opr_cv_init(&vol_put_volume_cond);
+ opr_cv_init(&vol_sleep_cond);
+ opr_cv_init(&vol_init_attach_cond);
+ opr_cv_init(&vol_vinit_cond);
#ifndef AFS_PTHREAD_ENV
IOMGR_Initialize();
#endif /* AFS_PTHREAD_ENV */
srandom(time(0)); /* For VGetVolumeInfo */
#ifdef AFS_DEMAND_ATTACH_FS
- MUTEX_INIT(&vol_salvsync_mutex, "salvsync", MUTEX_DEFAULT, 0);
+ opr_mutex_init(&vol_salvsync_mutex);
#endif /* AFS_DEMAND_ATTACH_FS */
/* Ok, we have done enough initialization that fileserver can
#if defined(AFS_DEMAND_ATTACH_FS) && defined(SALVSYNC_BUILD_CLIENT)
if (VCanUseSALVSYNC()) {
/* establish a connection to the salvager at this point */
- osi_Assert(VConnectSALV() != 0);
+ opr_Verify(VConnectSALV() != 0);
}
#endif /* AFS_DEMAND_ATTACH_FS */
int
VInitAttachVolumes(ProgramType pt)
{
- osi_Assert(VInit==1);
+ opr_Assert(VInit==1);
if (pt == fileServer) {
struct DiskPartition64 *diskP;
/* Attach all the volumes in this partition */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
int nAttached = 0, nUnattached = 0;
- osi_Assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
+ opr_Verify(VAttachVolumesByPartition(diskP,
+ &nAttached, &nUnattached)
+ == 0);
}
}
VOL_LOCK;
int
VInitAttachVolumes(ProgramType pt)
{
- osi_Assert(VInit==1);
+ opr_Assert(VInit==1);
if (pt == fileServer) {
struct DiskPartition64 *diskP;
struct vinitvolumepackage_thread_t params;
pthread_t tid;
pthread_attr_t attrs;
- CV_INIT(¶ms.thread_done_cv, "thread done", CV_DEFAULT, 0);
+ opr_cv_init(¶ms.thread_done_cv);
queue_Init(¶ms);
params.n_threads_complete = 0;
/* create partition work queue */
for (parts=0, diskP = DiskPartitionList; diskP; diskP = diskP->next, parts++) {
- dpq = (diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
- osi_Assert(dpq != NULL);
+ dpq = malloc(sizeof(struct diskpartition_queue_t));
+ opr_Assert(dpq != NULL);
dpq->diskP = diskP;
queue_Append(¶ms,dpq);
}
- threads = MIN(parts, vol_attach_threads);
+ threads = min(parts, vol_attach_threads);
if (threads > 1) {
/* spawn off a bunch of initialization threads */
- osi_Assert(pthread_attr_init(&attrs) == 0);
- osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ opr_Verify(pthread_attr_init(&attrs) == 0);
+ opr_Verify(pthread_attr_setdetachstate(&attrs,
+ PTHREAD_CREATE_DETACHED)
+ == 0);
Log("VInitVolumePackage: beginning parallel fileserver startup\n");
Log("VInitVolumePackage: using %d threads to attach volumes on %d partitions\n",
for (i=0; i < threads; i++) {
AFS_SIGSET_DECL;
AFS_SIGSET_CLEAR();
- osi_Assert(pthread_create
- (&tid, &attrs, &VInitVolumePackageThread,
- ¶ms) == 0);
+ opr_Verify(pthread_create(&tid, &attrs,
+ &VInitVolumePackageThread,
+ ¶ms) == 0);
AFS_SIGSET_RESTORE();
}
}
VOL_UNLOCK;
- osi_Assert(pthread_attr_destroy(&attrs) == 0);
+ opr_Verify(pthread_attr_destroy(&attrs) == 0);
} else {
/* if we're only going to run one init thread, don't bother creating
* another LWP */
VInitVolumePackageThread(¶ms);
}
- CV_DESTROY(¶ms.thread_done_cv);
+ opr_cv_destroy(¶ms.thread_done_cv);
}
VOL_LOCK;
VSetVInit_r(2); /* Initialized, and all volumes have been attached */
- CV_BROADCAST(&vol_init_attach_cond);
+ opr_cv_broadcast(&vol_init_attach_cond);
VOL_UNLOCK;
return 0;
}
diskP = dpq->diskP;
free(dpq);
- osi_Assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
+ opr_Verify(VAttachVolumesByPartition(diskP, &nAttached,
+ &nUnattached) == 0);
VOL_LOCK;
}
done:
params->n_threads_complete++;
- CV_SIGNAL(¶ms->thread_done_cv);
+ opr_cv_signal(¶ms->thread_done_cv);
VOL_UNLOCK;
return NULL;
}
int
VInitAttachVolumes(ProgramType pt)
{
- osi_Assert(VInit==1);
+ opr_Assert(VInit==1);
if (pt == fileServer) {
struct DiskPartition64 *diskP;
/* create partition work queue */
queue_Init(&pq);
- CV_INIT(&(pq.cv), "partq", CV_DEFAULT, 0);
- MUTEX_INIT(&(pq.mutex), "partq", MUTEX_DEFAULT, 0);
+ opr_cv_init(&pq.cv);
+ opr_mutex_init(&pq.mutex);
for (parts = 0, diskP = DiskPartitionList; diskP; diskP = diskP->next, parts++) {
struct diskpartition_queue_t *dp;
- dp = (struct diskpartition_queue_t*)malloc(sizeof(struct diskpartition_queue_t));
- osi_Assert(dp != NULL);
+ dp = malloc(sizeof(struct diskpartition_queue_t));
+ opr_Assert(dp != NULL);
dp->diskP = diskP;
queue_Append(&pq, dp);
}
/* number of worker threads; at least one, not to exceed the number of partitions */
- threads = MIN(parts, vol_attach_threads);
+ threads = min(parts, vol_attach_threads);
/* create volume work queue */
queue_Init(&vq);
- CV_INIT(&(vq.cv), "volq", CV_DEFAULT, 0);
- MUTEX_INIT(&(vq.mutex), "volq", MUTEX_DEFAULT, 0);
+ opr_cv_init(&vq.cv);
+ opr_mutex_init(&vq.mutex);
- osi_Assert(pthread_attr_init(&attrs) == 0);
- osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ opr_Verify(pthread_attr_init(&attrs) == 0);
+ opr_Verify(pthread_attr_setdetachstate(&attrs,
+ PTHREAD_CREATE_DETACHED) == 0);
Log("VInitVolumePackage: beginning parallel fileserver startup\n");
Log("VInitVolumePackage: using %d threads to pre-attach volumes on %d partitions\n",
struct vinitvolumepackage_thread_param *params;
AFS_SIGSET_DECL;
- params = (struct vinitvolumepackage_thread_param *)malloc(sizeof(struct vinitvolumepackage_thread_param));
- osi_Assert(params);
+ params = malloc(sizeof(struct vinitvolumepackage_thread_param));
+ opr_Assert(params);
params->pq = &pq;
params->vq = &vq;
params->nthreads = threads;
params->thread = i+1;
AFS_SIGSET_CLEAR();
- osi_Assert(pthread_create (&tid, &attrs, &VInitVolumePackageThread, (void*)params) == 0);
+ opr_Verify(pthread_create(&tid, &attrs,
+ &VInitVolumePackageThread,
+ (void*)params) == 0);
AFS_SIGSET_RESTORE();
}
VInitPreAttachVolumes(threads, &vq);
- osi_Assert(pthread_attr_destroy(&attrs) == 0);
- CV_DESTROY(&pq.cv);
- MUTEX_DESTROY(&pq.mutex);
- CV_DESTROY(&vq.cv);
- MUTEX_DESTROY(&vq.mutex);
+ opr_Verify(pthread_attr_destroy(&attrs) == 0);
+ opr_cv_destroy(&pq.cv);
+ opr_mutex_destroy(&pq.mutex);
+ opr_cv_destroy(&vq.cv);
+ opr_mutex_destroy(&vq.mutex);
}
VOL_LOCK;
VSetVInit_r(2); /* Initialized, and all volumes have been attached */
- CV_BROADCAST(&vol_init_attach_cond);
+ opr_cv_broadcast(&vol_init_attach_cond);
VOL_UNLOCK;
return 0;
struct volume_init_queue *vq;
struct volume_init_batch *vb;
- osi_Assert(args);
+ opr_Assert(args);
params = (struct vinitvolumepackage_thread_param *)args;
pq = params->pq;
vq = params->vq;
- osi_Assert(pq);
- osi_Assert(vq);
+ opr_Assert(pq);
+ opr_Assert(vq);
- vb = (struct volume_init_batch*)malloc(sizeof(struct volume_init_batch));
- osi_Assert(vb);
+ vb = malloc(sizeof(struct volume_init_batch));
+ opr_Assert(vb);
vb->thread = params->thread;
vb->last = 0;
vb->size = 0;
Log("Scanning partitions on thread %d of %d\n", params->thread, params->nthreads);
while((partition = VInitNextPartition(pq))) {
DIR *dirp;
- VolId vid;
+ VolumeId vid;
Log("Partition %s: pre-attaching volumes\n", partition->name);
dirp = opendir(VPartitionPath(partition));
continue;
}
while ((vid = VInitNextVolumeId(dirp))) {
- Volume *vp = (Volume*)malloc(sizeof(Volume));
- osi_Assert(vp);
- memset(vp, 0, sizeof(Volume));
+ Volume *vp = calloc(1, sizeof(Volume));
+ opr_Assert(vp);
vp->device = partition->device;
vp->partition = partition;
vp->hashid = vid;
queue_Init(&vp->vnode_list);
- CV_INIT(&V_attachCV(vp), "partattach", CV_DEFAULT, 0);
+ queue_Init(&vp->rx_call_list);
+ opr_cv_init(&V_attachCV(vp));
vb->batch[vb->size++] = vp;
if (vb->size == VINIT_BATCH_MAX_SIZE) {
- MUTEX_ENTER(&vq->mutex);
+ opr_mutex_enter(&vq->mutex);
queue_Append(vq, vb);
- CV_BROADCAST(&vq->cv);
- MUTEX_EXIT(&vq->mutex);
+ opr_cv_broadcast(&vq->cv);
+ opr_mutex_exit(&vq->mutex);
- vb = (struct volume_init_batch*)malloc(sizeof(struct volume_init_batch));
- osi_Assert(vb);
+ vb = malloc(sizeof(struct volume_init_batch));
+ opr_Assert(vb);
vb->thread = params->thread;
vb->size = 0;
vb->last = 0;
}
vb->last = 1;
- MUTEX_ENTER(&vq->mutex);
+ opr_mutex_enter(&vq->mutex);
queue_Append(vq, vb);
- CV_BROADCAST(&vq->cv);
- MUTEX_EXIT(&vq->mutex);
+ opr_cv_broadcast(&vq->cv);
+ opr_mutex_exit(&vq->mutex);
Log("Partition scan thread %d of %d ended\n", params->thread, params->nthreads);
free(params);
}
/* get next partition to scan */
- MUTEX_ENTER(&pq->mutex);
+ opr_mutex_enter(&pq->mutex);
if (queue_IsEmpty(pq)) {
- MUTEX_EXIT(&pq->mutex);
+ opr_mutex_exit(&pq->mutex);
return NULL;
}
dp = queue_First(pq, diskpartition_queue_t);
queue_Remove(dp);
- MUTEX_EXIT(&pq->mutex);
+ opr_mutex_exit(&pq->mutex);
- osi_Assert(dp);
- osi_Assert(dp->diskP);
+ opr_Assert(dp);
+ opr_Assert(dp->diskP);
partition = dp->diskP;
free(dp);
/**
* Find next volume id on the partition.
*/
-static VolId
+static VolumeId
VInitNextVolumeId(DIR *dirp)
{
struct dirent *d;
- VolId vid = 0;
+ VolumeId vid = 0;
char *ext;
while((d = readdir(dirp))) {
while (nthreads) {
/* dequeue next volume */
- MUTEX_ENTER(&vq->mutex);
+ opr_mutex_enter(&vq->mutex);
if (queue_IsEmpty(vq)) {
- CV_WAIT(&vq->cv, &vq->mutex);
+ opr_cv_wait(&vq->cv, &vq->mutex);
}
vb = queue_First(vq, volume_init_batch);
queue_Remove(vb);
- MUTEX_EXIT(&vq->mutex);
+ opr_mutex_exit(&vq->mutex);
if (vb->size) {
VOL_LOCK;
Log("Error looking up volume, code=%d\n", ec);
}
else if (dup) {
- Log("Warning: Duplicate volume id %d detected.\n", vp->hashid);
+ Log("Warning: Duplicate volume id %" AFS_VOLID_FMT " detected.\n", afs_printable_VolumeId_lu(vp->hashid));
}
else {
/* put pre-attached volume onto the hash table
(*(vp ? nAttached : nUnattached))++;
if (error == VOFFLINE)
Log("Volume %d stays offline (/vice/offline/%s exists)\n", VolumeNumber(dp->d_name), dp->d_name);
- else if (LogLevel >= 5) {
+ else if (GetLogLevel() >= 5) {
Log("Partition %s: attached volume %d (%s)\n",
diskP->name, VolumeNumber(dp->d_name),
dp->d_name);
Log("VShutdown: shutting down on-line volumes on %d partition%s...\n",
params.n_parts, params.n_parts > 1 ? "s" : "");
+ vol_shutting_down = 1;
+
if (vol_attach_threads > 1) {
/* prepare for parallel shutdown */
params.n_threads = vol_attach_threads;
- MUTEX_INIT(¶ms.lock, "params", MUTEX_DEFAULT, 0);
- CV_INIT(¶ms.cv, "params", CV_DEFAULT, 0);
- CV_INIT(¶ms.master_cv, "params master", CV_DEFAULT, 0);
- osi_Assert(pthread_attr_init(&attrs) == 0);
- osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ opr_mutex_init(¶ms.lock);
+ opr_cv_init(¶ms.cv);
+ opr_cv_init(¶ms.master_cv);
+ opr_Verify(pthread_attr_init(&attrs) == 0);
+ opr_Verify(pthread_attr_setdetachstate(&attrs,
+ PTHREAD_CREATE_DETACHED) == 0);
queue_Init(¶ms);
/* setup the basic partition information structures for
/* build up the pass 0 shutdown work queue */
- dpq = (struct diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
- osi_Assert(dpq != NULL);
+ dpq = malloc(sizeof(struct diskpartition_queue_t));
+ opr_Assert(dpq != NULL);
dpq->diskP = diskP;
queue_Prepend(¶ms, dpq);
vol_attach_threads, params.n_parts, params.n_parts > 1 ? "s" : "" );
/* do pass 0 shutdown */
- MUTEX_ENTER(¶ms.lock);
+ opr_mutex_enter(¶ms.lock);
for (i=0; i < params.n_threads; i++) {
- osi_Assert(pthread_create
- (&tid, &attrs, &VShutdownThread,
- ¶ms) == 0);
+ opr_Verify(pthread_create(&tid, &attrs, &VShutdownThread,
+ ¶ms) == 0);
}
/* wait for all the pass 0 shutdowns to complete */
}
params.n_threads_complete = 0;
params.pass = 1;
- CV_BROADCAST(¶ms.cv);
- MUTEX_EXIT(¶ms.lock);
+ opr_cv_broadcast(¶ms.cv);
+ opr_mutex_exit(¶ms.lock);
Log("VShutdown: pass 0 completed using the 1 thread per partition algorithm\n");
Log("VShutdown: starting passes 1 through 3 using finely-granular mp-fast algorithm\n");
VOL_CV_WAIT(¶ms.cv);
}
- osi_Assert(pthread_attr_destroy(&attrs) == 0);
- CV_DESTROY(¶ms.cv);
- CV_DESTROY(¶ms.master_cv);
- MUTEX_DESTROY(¶ms.lock);
+ opr_Verify(pthread_attr_destroy(&attrs) == 0);
+ opr_cv_destroy(¶ms.cv);
+ opr_cv_destroy(¶ms.master_cv);
+ opr_mutex_destroy(¶ms.lock);
/* drop the VByPList exclusive reservations */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
}
Log("VShutdown: shutting down on-line volumes...\n");
+ vol_shutting_down = 1;
for (i = 0; i < VolumeHashTable.Size; i++) {
/* try to hold first volume in the hash table */
for (queue_Scan(&VolumeHashTable.Table[i],vp,np,Volume)) {
code = VHold_r(vp);
if (code == 0) {
- if (LogLevel >= 5)
- Log("VShutdown: Attempting to take volume %u offline.\n",
- vp->hashid);
+ if (GetLogLevel() >= 5)
+ Log("VShutdown: Attempting to take volume %" AFS_VOLID_FMT " offline.\n",
+ afs_printable_VolumeId_lu(vp->hashid));
/* next, take the volume offline (drops reference count) */
VOffline_r(vp, "File server was shut down");
void
VShutdown(void)
{
- osi_Assert(VInit>0);
+ opr_Assert(VInit>0);
VOL_LOCK;
VShutdown_r();
VOL_UNLOCK;
params = (vshutdown_thread_t *) args;
/* acquire the shutdown pass 0 lock */
- MUTEX_ENTER(¶ms->lock);
+ opr_mutex_enter(¶ms->lock);
/* if there's still pass 0 work to be done,
* get a work entry, and do a pass 0 shutdown */
if (queue_IsNotEmpty(params)) {
dpq = queue_First(params, diskpartition_queue_t);
queue_Remove(dpq);
- MUTEX_EXIT(¶ms->lock);
+ opr_mutex_exit(¶ms->lock);
diskP = dpq->diskP;
free(dpq);
id = diskP->index;
while (ShutdownVolumeWalk_r(diskP, 0, ¶ms->part_pass_head[id]))
count++;
params->stats[0][diskP->index] = count;
- MUTEX_ENTER(¶ms->lock);
+ opr_mutex_enter(¶ms->lock);
}
params->n_threads_complete++;
if (params->n_threads_complete == params->n_threads) {
/* notify control thread that all workers have completed pass 0 */
- CV_SIGNAL(¶ms->master_cv);
+ opr_cv_signal(¶ms->master_cv);
}
while (params->pass == 0) {
- CV_WAIT(¶ms->cv, ¶ms->lock);
+ opr_cv_wait(¶ms->cv, ¶ms->lock);
}
/* switch locks */
- MUTEX_EXIT(¶ms->lock);
+ opr_mutex_exit(¶ms->lock);
VOL_LOCK;
pass = params->pass;
- osi_Assert(pass > 0);
+ opr_Assert(pass > 0);
/* now escalate through the more complicated shutdowns */
while (pass <= 3) {
ShutdownCreateSchedule(params);
/* wake up all the workers */
- CV_BROADCAST(¶ms->cv);
+ opr_cv_broadcast(¶ms->cv);
VOL_UNLOCK;
Log("VShutdown: pass %d completed using %d threads on %d partitions\n",
{
struct rx_queue * q = queue_First(&dp->vol_list, rx_queue);
int i = 0;
+ const char *pass_strs[4] = {"{un/pre}attached vols", "vols w/ vol header loaded", "vols w/o vol header loaded", "vols with exclusive state"};
- while (ShutdownVolumeWalk_r(dp, pass, &q))
+ while (ShutdownVolumeWalk_r(dp, pass, &q)) {
i++;
+ if (0 == i%100) {
+ Log("VShutdownByPartition: ... shut down %d volumes on %s in pass %d (%s)\n", i, VPartitionPath(dp), pass, pass_strs[pass]);
+ }
+ }
return i;
}
VCreateReservation_r(vp);
- if (LogLevel >= 5) {
- Log("VShutdownVolume_r: vid=%u, device=%d, state=%hu\n",
- vp->hashid, vp->partition->device, V_attachState(vp));
+ if (GetLogLevel() >= 5) {
+ Log("VShutdownVolume_r: vid=%" AFS_VOLID_FMT ", device=%d, state=%u\n",
+ afs_printable_VolumeId_lu(vp->hashid), vp->partition->device,
+ (unsigned int) V_attachState(vp));
}
/* wait for other blocking ops to finish */
VWaitExclusiveState_r(vp);
- osi_Assert(VIsValidState(V_attachState(vp)));
+ opr_Assert(VIsValidState(V_attachState(vp)));
switch(V_attachState(vp)) {
case VOL_STATE_SALVAGING:
case VOL_STATE_ATTACHED:
code = VHold_r(vp);
if (!code) {
- if (LogLevel >= 5)
- Log("VShutdown: Attempting to take volume %u offline.\n",
- vp->hashid);
+ if (GetLogLevel() >= 5)
+ Log("VShutdown: Attempting to take volume %" AFS_VOLID_FMT " offline.\n",
+ afs_printable_VolumeId_lu(vp->hashid));
/* take the volume offline (drops reference count) */
VOffline_r(vp, "File server was shut down");
/* Header I/O routines */
/***************************************************/
+static const char *
+HeaderName(bit32 magic)
+{
+ switch (magic) {
+ case VOLUMEINFOMAGIC:
+ return "volume info";
+ case SMALLINDEXMAGIC:
+ return "small index";
+ case LARGEINDEXMAGIC:
+ return "large index";
+ case LINKTABLEMAGIC:
+ return "link table";
+ }
+ return "unknown";
+}
+
/* open a descriptor for the inode (h),
* read in an on-disk structure into buffer (to) of size (size),
* verify versionstamp in structure has magic (magic) and
{
struct versionStamp *vsn;
FdHandle_t *fdP;
+ afs_sfsize_t nbytes;
+ afs_ino_str_t stmp;
*ec = 0;
if (h == NULL) {
+ Log("ReadHeader: Null inode handle argument for %s header file.\n",
+ HeaderName(magic));
*ec = VSALVAGE;
return;
}
fdP = IH_OPEN(h);
if (fdP == NULL) {
+ Log("ReadHeader: Failed to open %s header file "
+ "(volume=%" AFS_VOLID_FMT ", inode=%s); errno=%d\n", HeaderName(magic), afs_printable_VolumeId_lu(h->ih_vid),
+ PrintInode(stmp, h->ih_ino), errno);
*ec = VSALVAGE;
return;
}
vsn = (struct versionStamp *)to;
- if (FDH_PREAD(fdP, to, size, 0) != size || vsn->magic != magic) {
+ nbytes = FDH_PREAD(fdP, to, size, 0);
+ if (nbytes < 0) {
+ Log("ReadHeader: Failed to read %s header file "
+ "(volume=%" AFS_VOLID_FMT ", inode=%s); errno=%d\n", HeaderName(magic), afs_printable_VolumeId_lu(h->ih_vid),
+ PrintInode(stmp, h->ih_ino), errno);
*ec = VSALVAGE;
FDH_REALLYCLOSE(fdP);
return;
}
+ if (nbytes != size) {
+ Log("ReadHeader: Incorrect number of bytes read from %s header file "
+ "(volume=%" AFS_VOLID_FMT ", inode=%s); expected=%d, read=%d\n",
+ HeaderName(magic), afs_printable_VolumeId_lu(h->ih_vid),
+ PrintInode(stmp, h->ih_ino), size, (int)nbytes);
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ if (vsn->magic != magic) {
+ Log("ReadHeader: Incorrect magic for %s header file "
+ "(volume=%" AFS_VOLID_FMT ", inode=%s); expected=0x%x, read=0x%x\n",
+ HeaderName(magic), afs_printable_VolumeId_lu(h->ih_vid),
+ PrintInode(stmp, h->ih_ino), magic, vsn->magic);
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+
FDH_CLOSE(fdP);
/* Check is conditional, in case caller wants to inspect version himself */
if (version && vsn->version != version) {
+ Log("ReadHeader: Incorrect version for %s header file "
+ "(volume=%" AFS_VOLID_FMT ", inode=%s); expected=%x, read=%x\n",
+ HeaderName(magic), afs_printable_VolumeId_lu(h->ih_vid), PrintInode(stmp, h->ih_ino),
+ version, vsn->version);
*ec = VSALVAGE;
}
}
Volume *
VPreAttachVolumeById_r(Error * ec,
char * partition,
- VolId volumeId)
+ VolumeId volumeId)
{
Volume *vp;
struct DiskPartition64 *partp;
*ec = 0;
- osi_Assert(programType == fileServer);
+ opr_Assert(programType == fileServer);
if (!(partp = VGetPartition_r(partition, 0))) {
*ec = VNOVOL;
return NULL;
}
+ /* ensure that any vp we pass to VPreAttachVolumeByVp_r
+ * is NOT in exclusive state.
+ */
+ retry:
vp = VLookupVolume_r(ec, volumeId, NULL);
+
if (*ec) {
return NULL;
}
+ if (vp && VIsExclusiveState(V_attachState(vp))) {
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ VCancelReservation_r(vp);
+ vp = NULL;
+ goto retry; /* look up volume again */
+ }
+
+ /* vp == NULL or vp not exclusive both OK */
+
return VPreAttachVolumeByVp_r(ec, partp, vp, volumeId);
}
*
* @pre VOL_LOCK is held.
*
+ * @pre vp (if specified) must not be in exclusive state.
+ *
* @warning Returned volume object pointer does not have to
* equal the pointer passed in as argument vp. There
* are potential race conditions which can result in
VPreAttachVolumeByVp_r(Error * ec,
struct DiskPartition64 * partp,
Volume * vp,
- VolId vid)
+ VolumeId vid)
{
Volume *nvp = NULL;
*ec = 0;
+ /* don't proceed unless it's safe */
+ if (vp) {
+ opr_Assert(!VIsExclusiveState(V_attachState(vp)));
+ }
+
/* check to see if pre-attach already happened */
if (vp &&
(V_attachState(vp) != VOL_STATE_UNATTACHED) &&
* - volume is in an error state
* - volume is pre-attached
*/
- Log("VPreattachVolumeByVp_r: volume %u not in quiescent state\n", vid);
+ Log("VPreattachVolumeByVp_r: volume %" AFS_VOLID_FMT " not in quiescent state (state %u flags 0x%x)\n",
+ afs_printable_VolumeId_lu(vid), V_attachState(vp),
+ V_attachFlags(vp));
goto done;
} else if (vp) {
/* we're re-attaching a volume; clear out some old state */
VOL_UNLOCK;
/* allocate the volume structure */
- vp = nvp = (Volume *) malloc(sizeof(Volume));
- osi_Assert(vp != NULL);
- memset(vp, 0, sizeof(Volume));
+ vp = nvp = calloc(1, sizeof(Volume));
+ opr_Assert(vp != NULL);
queue_Init(&vp->vnode_list);
- CV_INIT(&V_attachCV(vp), "vp attach", CV_DEFAULT, 0);
+ queue_Init(&vp->rx_call_list);
+ opr_cv_init(&V_attachCV(vp));
}
/* link the volume with its associated vice partition */
VLRU_Init_Node_r(vp);
VChangeState_r(vp, VOL_STATE_PREATTACHED);
- if (LogLevel >= 5)
- Log("VPreAttachVolumeByVp_r: volume %u pre-attached\n", vp->hashid);
+ if (GetLogLevel() >= 5)
+ Log("VPreAttachVolumeByVp_r: volume %" AFS_VOLID_FMT " pre-attached\n", afs_printable_VolumeId_lu(vp->hashid));
done:
if (*ec)
struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
- VolId volumeId;
+ VolumeId volumeId;
+ int checkedOut;
#ifdef AFS_DEMAND_ATTACH_FS
VolumeStats stats_save;
Volume *svp = NULL;
}
if (VRequiresPartLock()) {
- osi_Assert(VInit == 3);
+ opr_Assert(VInit == 3);
VLockPartition_r(partition);
} else if (programType == fileServer) {
#ifdef AFS_DEMAND_ATTACH_FS
}
}
- osi_Assert(vp != NULL);
+ opr_Assert(vp != NULL);
/* handle pre-attach races
*
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
if (!vp) {
vp = (Volume *) calloc(1, sizeof(Volume));
- osi_Assert(vp != NULL);
+ opr_Assert(vp != NULL);
vp->hashid = volumeId;
vp->device = partp->device;
vp->partition = partp;
queue_Init(&vp->vnode_list);
+ queue_Init(&vp->rx_call_list);
#ifdef AFS_DEMAND_ATTACH_FS
- CV_INIT(&V_attachCV(vp), "vp attach", CV_DEFAULT, 0);
+ opr_cv_init(&V_attachCV(vp));
#endif /* AFS_DEMAND_ATTACH_FS */
}
/* attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode, &checkedOut);
if (VCanUseFSSYNC() && vp) {
#ifdef AFS_DEMAND_ATTACH_FS
if (mode == V_PEEK) {
vp->needsPutBack = 0;
} else {
- vp->needsPutBack = 1;
+ vp->needsPutBack = VOL_PUTBACK;
}
#else /* !AFS_DEMAND_ATTACH_FS */
/* duplicate computation in fssync.c about whether the server
|| (!VolumeWriteable(vp) && (mode == V_CLONE || mode == V_DUMP)))
vp->needsPutBack = 0;
else
- vp->needsPutBack = 1;
+ vp->needsPutBack = VOL_PUTBACK;
#endif /* !AFS_DEMAND_ATTACH_FS */
}
- /* OK, there's a problem here, but one that I don't know how to
- * fix right now, and that I don't think should arise often.
- * Basically, we should only put back this volume to the server if
- * it was given to us by the server, but since we don't have a vp,
- * we can't run the VolumeWriteable function to find out as we do
- * above when computing vp->needsPutBack. So we send it back, but
- * there's a path in VAttachVolume on the server which may abort
- * if this volume doesn't have a header. Should be pretty rare
- * for all of that to happen, but if it does, probably the right
- * fix is for the server to allow the return of readonly volumes
- * that it doesn't think are really checked out. */
#ifdef FSSYNC_BUILD_CLIENT
- if (VCanUseFSSYNC() && vp == NULL &&
- mode != V_SECRETLY && mode != V_PEEK) {
+ /* Only give back the vol to the fileserver if we checked it out; attach2
+ * will set checkedOut only if we successfully checked it out from the
+ * fileserver. */
+ if (VCanUseFSSYNC() && vp == NULL && checkedOut) {
#ifdef AFS_DEMAND_ATTACH_FS
/* If we couldn't attach but we scheduled a salvage, we already
goto done;
}
}
- if (LogLevel)
- Log("VOnline: volume %u (%s) attached and online\n", V_id(vp),
+ if (GetLogLevel() != 0)
+ Log("VOnline: volume %" AFS_VOLID_FMT " (%s) attached and online\n", afs_printable_VolumeId_lu(V_id(vp)),
V_name(vp));
}
struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
- VolId volumeId;
+ VolumeId volumeId;
Volume * nvp = NULL;
VolumeStats stats_save;
+ int checkedOut;
*ec = 0;
/* volume utility should never call AttachByVp */
- osi_Assert(programType == fileServer);
+ opr_Assert(programType == fileServer);
volumeId = vp->hashid;
partp = vp->partition;
}
}
- osi_Assert(vp != NULL);
+ opr_Assert(vp != NULL);
VChangeState_r(vp, VOL_STATE_ATTACHING);
/* restore monotonically increasing stats */
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
/* do volume attach
*
* NOTE: attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode, &checkedOut);
/*
* the event that an error was encountered, or
VUpdateVolume_r(ec, vp, 0);
if (*ec) {
- Log("VAttachVolume: Error updating volume %u\n", vp->hashid);
+ Log("VAttachVolume: Error updating volume %" AFS_VOLID_FMT "\n",
+ afs_printable_VolumeId_lu(vp->hashid));
VPutVolume_r(vp);
goto done;
}
#endif /* !AFS_DEMAND_ATTACH_FS */
VAddToVolumeUpdateList_r(ec, vp);
if (*ec) {
- Log("VAttachVolume: Error adding volume %u to update list\n", vp->hashid);
+ Log("VAttachVolume: Error adding volume %" AFS_VOLID_FMT " to update list\n",
+ afs_printable_VolumeId_lu(vp->hashid));
if (vp)
VPutVolume_r(vp);
goto done;
}
}
- if (LogLevel)
- Log("VOnline: volume %u (%s) attached and online\n", V_id(vp),
- V_name(vp));
+ if (GetLogLevel() != 0)
+ Log("VOnline: volume %" AFS_VOLID_FMT " (%s) attached and online\n",
+ afs_printable_VolumeId_lu(V_id(vp)), V_name(vp));
done:
if (reserve) {
VCancelReservation_r(nvp);
{
int code;
- osi_Assert(programType != fileServer || VIsExclusiveState(V_attachState(vp)));
- osi_Assert(!(V_attachFlags(vp) & VOL_LOCKED));
+ opr_Assert(programType != fileServer
+ || VIsExclusiveState(V_attachState(vp)));
+ opr_Assert(!(V_attachFlags(vp) & VOL_LOCKED));
code = VLockVolumeByIdNB(vp->hashid, vp->partition, locktype);
if (code == 0) {
static void
VUnlockVolume(Volume *vp)
{
- osi_Assert(programType != fileServer || VIsExclusiveState(V_attachState(vp)));
- osi_Assert((V_attachFlags(vp) & VOL_LOCKED));
+ opr_Assert(programType != fileServer
+ || VIsExclusiveState(V_attachState(vp)));
+ opr_Assert((V_attachFlags(vp) & VOL_LOCKED));
VUnlockVolumeById(vp->hashid, vp->partition);
* we don't try to lock the vol, or check it out from
* FSSYNC or anything like that; 0 otherwise, for 'normal'
* operation
+ * @param[out] acheckedOut If we successfully checked-out the volume from
+ * the fileserver (if we needed to), this is set
+ * to 1, otherwise it is untouched.
*
* @note As part of DAFS volume attachment, the volume header may be either
* read- or write-locked to ensure mutual exclusion of certain volume
*/
static void
attach_volume_header(Error *ec, Volume *vp, struct DiskPartition64 *partp,
- int mode, int peek)
+ int mode, int peek, int *acheckedOut)
{
struct VolumeDiskHeader diskHeader;
struct VolumeHeader header;
SYNC_response res;
memset(&res, 0, sizeof(res));
- if (FSYNC_VolOp(volid, VPartitionPath(partp), FSYNC_VOL_NEEDVOLUME, mode, &res)
+ if (FSYNC_VolOp(volid, partp->name, FSYNC_VOL_NEEDVOLUME, mode, &res)
!= SYNC_OK) {
if (res.hdr.reason == FSYNC_SALVAGE) {
}
goto done;
}
+ *acheckedOut = 1;
}
#endif
if (VCanUseFSSYNC() && (mode == V_PEEK || peek)) {
SYNC_response res;
res.payload.len = sizeof(VolumeDiskData);
- res.payload.buf = &vp->header->diskstuff;
+ res.payload.buf = &(V_disk(vp));
if (FSYNC_VolOp(vp->hashid,
partp->name,
#if defined(AFS_DEMAND_ATTACH_FS) && defined(FSSYNC_BUILD_CLIENT)
if (!peek && *ec == 0 && retry == 0 && VMustCheckoutVolume(mode)) {
- code = FSYNC_VerifyCheckout(volid, VPartitionPath(partp), FSYNC_VOL_NEEDVOLUME, mode);
+ code = FSYNC_VerifyCheckout(volid, partp->name, FSYNC_VOL_NEEDVOLUME, mode);
if (code == SYNC_DENIED) {
/* must retry checkout; fileserver no longer thinks we have
}
if (*ec) {
+ VOL_LOCK;
+ FreeVolumeHeader(vp);
+ VOL_UNLOCK;
return;
}
if (retry) {
#ifdef AFS_DEMAND_ATTACH_FS
static void
attach_check_vop(Error *ec, VolumeId volid, struct DiskPartition64 *partp,
- Volume *vp)
+ Volume *vp, int *acheckedOut)
{
*ec = 0;
/* attach header with peek=1 to avoid checking out the volume
* or locking it; we just want the header info, we're not
* messing with the volume itself at all */
- attach_volume_header(ec, vp, partp, V_PEEK, 1);
+ attach_volume_header(ec, vp, partp, V_PEEK, 1, acheckedOut);
if (*ec) {
return;
}
switch (vp->pending_vol_op->vol_op_state) {
case FSSYNC_VolOpPending:
/* this should never happen */
- osi_Assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpPending);
+ opr_Assert(vp->pending_vol_op->vol_op_state
+ != FSSYNC_VolOpPending);
break;
case FSSYNC_VolOpRunningUnknown:
/* this should never happen; we resolved 'unknown' above */
- osi_Assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpRunningUnknown);
+ opr_Assert(vp->pending_vol_op->vol_op_state
+ != FSSYNC_VolOpRunningUnknown);
break;
case FSSYNC_VolOpRunningOffline:
/* check to see if we should set the specialStatus flag */
if (VVolOpSetVBusy_r(vp, vp->pending_vol_op)) {
- vp->specialStatus = VBUSY;
+ /* don't overwrite specialStatus if it was already set to
+ * something else (e.g. VMOVED) */
+ if (!vp->specialStatus) {
+ vp->specialStatus = VBUSY;
+ }
}
break;
* @param[in] path full path to the volume header .vol file
* @param[in] partp disk partition object for the attaching partition
* @param[in] vp volume object; vp->hashid, vp->device, vp->partition,
- * vp->vnode_list, and V_attachCV (for DAFS) should already
- * be initialized
+ * vp->vnode_list, vp->rx_call_list, and V_attachCV (for
+ * DAFS) should already be initialized
* @param[in] isbusy 1 if vp->specialStatus should be set to VBUSY; that is,
* if there is a volume operation running for this volume
* that should set the volume to VBUSY during its run. 0
* otherwise. (see VVolOpSetVBusy_r)
* @param[in] mode attachment mode such as V_VOLUPD, V_DUMP, etc (see
* volume.h)
+ * @param[out] acheckedOut If we successfully checked-out the volume from
+ * the fileserver (if we needed to), this is set
+ * to 1, otherwise it is 0.
*
* @return pointer to the semi-attached volume pointer
* @retval NULL an error occurred (check value of *ec)
* @post VOL_LOCK held
*/
static Volume *
-attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
- Volume * vp, int isbusy, int mode)
+attach2(Error * ec, VolumeId volumeId, char *path, struct DiskPartition64 *partp,
+ Volume * vp, int isbusy, int mode, int *acheckedOut)
{
/* have we read in the header successfully? */
int read_header = 0;
vp->diskDataHandle = NULL;
vp->linkHandle = NULL;
+ *acheckedOut = 0;
+
#ifdef AFS_DEMAND_ATTACH_FS
- attach_check_vop(ec, volumeId, partp, vp);
+ attach_check_vop(ec, volumeId, partp, vp, acheckedOut);
if (!*ec) {
- attach_volume_header(ec, vp, partp, mode, 0);
+ attach_volume_header(ec, vp, partp, mode, 0, acheckedOut);
}
#else
- attach_volume_header(ec, vp, partp, mode, 0);
+ attach_volume_header(ec, vp, partp, mode, 0, acheckedOut);
#endif /* !AFS_DEMAND_ATTACH_FS */
if (*ec == VNOVOL) {
if (!*ec) {
read_header = 1;
- vp->specialStatus = (byte) (isbusy ? VBUSY : 0);
+ /* ensure that we don't override specialStatus if it was set to
+ * something else (e.g. VMOVED) */
+ if (isbusy && !vp->specialStatus) {
+ vp->specialStatus = VBUSY;
+ }
vp->shuttingDown = 0;
vp->goingOffline = 0;
vp->nUsers = 1;
if (!*ec) {
struct IndexFileHeader iHead;
-#if OPENAFS_VOL_STATS
/*
* We just read in the diskstuff part of the header. If the detailed
* volume stats area has not yet been initialized, we should bzero the
memset((V_stat_area(vp)), 0, VOL_STATS_BYTES);
V_stat_initialized(vp) = 1;
}
-#endif /* OPENAFS_VOL_STATS */
(void)ReadHeader(ec, vp->vnodeIndex[vSmall].handle,
(char *)&iHead, sizeof(iHead),
if (!VCanScheduleSalvage()) {
Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
}
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
goto locked_error;
} else if (*ec) {
/* volume operation in progress */
- goto unlocked_error;
+ VOL_LOCK;
+ /* we have already transitioned the vp away from ATTACHING state, so we
+ * can go right to the end of attach2, and we do not need to transition
+ * to ERROR. */
+ goto error_notbroken;
}
#else /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
if (!VCanScheduleSalvage()) {
Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
}
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#else /* AFS_DEMAND_ATTACH_FS */
if (!VCanScheduleSalvage()) {
Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
}
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#else /* AFS_DEMAND_ATTACH_FS */
#if defined(AFS_DEMAND_ATTACH_FS)
/* schedule a salvage so the volume goes away on disk */
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
VChangeState_r(vp, VOL_STATE_ERROR);
vp->nUsers = 0;
forcefree = 1;
VGetBitmap_r(ec, vp, i);
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
Log("VAttachVolume: error getting bitmap for volume (%s)\n",
"%lu; needs salvage\n", (int)*ec,
afs_printable_uint32_lu(V_id(vp)));
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#else /* !AFS_DEMAND_ATTACH_FS */
*ec = VSALVAGE;
V_inUse(vp) = fileServer;
V_offlineMessage(vp)[0] = '\0';
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* check if the volume is actually usable. only do this for DAFS; for
+ * non-DAFS, volumes that are not inService/blessed can still be
+ * attached, even if clients cannot access them. this is relevant
+ * because for non-DAFS, we try to attach the volume when e.g.
+ * volserver gives us back then vol when its done with it, but
+ * volserver may give us back a volume that is not inService/blessed. */
+
if (!V_inUse(vp)) {
*ec = VNOVOL;
-#ifdef AFS_DEMAND_ATTACH_FS
/* Put the vol into PREATTACHED state, so if someone tries to
* access it again, we try to attach, see that we're not blessed,
* and give a VNOVOL error again. Putting it into UNATTACHED state
* would result in a VOFFLINE error instead. */
error_state = VOL_STATE_PREATTACHED;
-#endif /* AFS_DEMAND_ATTACH_FS */
/* mimic e.g. GetVolume errors */
if (!V_blessed(vp)) {
FreeVolumeHeader(vp);
} else if (!V_inService(vp)) {
Log("Volume %lu offline: not in service\n", afs_printable_uint32_lu(V_id(vp)));
+ /* the volume is offline and should be unattached */
+ *ec = VOFFLINE;
+ error_state = VOL_STATE_UNATTACHED;
FreeVolumeHeader(vp);
} else {
Log("Volume %lu offline: needs salvage\n", afs_printable_uint32_lu(V_id(vp)));
*ec = VSALVAGE;
-#ifdef AFS_DEMAND_ATTACH_FS
error_state = VOL_STATE_ERROR;
/* see if we can recover */
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
-#endif
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_NO_OFFLINE);
}
-#ifdef AFS_DEMAND_ATTACH_FS
vp->nUsers = 0;
-#endif
goto locked_error;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
} else {
#ifdef AFS_DEMAND_ATTACH_FS
- if ((mode != V_PEEK) && (mode != V_SECRETLY))
+ if ((mode != V_PEEK) && (mode != V_SECRETLY) && (mode != V_READONLY))
V_inUse(vp) = programType;
#endif /* AFS_DEMAND_ATTACH_FS */
V_checkoutMode(vp) = mode;
}
- AddVolumeToHashTable(vp, V_id(vp));
+ AddVolumeToHashTable(vp, vp->hashid);
#ifdef AFS_DEMAND_ATTACH_FS
if (VCanUnlockAttached() && (V_attachFlags(vp) & VOL_LOCKED)) {
VUnlockVolume(vp);
locked_error:
#ifdef AFS_DEMAND_ATTACH_FS
if (!VIsErrorState(V_attachState(vp))) {
+ if (programType != fileServer && *ec == VNOVOL) {
+ /* do not log anything in this case; it is common for
+ * non-fileserver programs to fail here with VNOVOL, since that
+ * is what happens when they simply try to use a volume, but that
+ * volume doesn't exist. */
+
+ } else if (VIsErrorState(error_state)) {
+ Log("attach2: forcing vol %" AFS_VOLID_FMT " to error state (state %u flags 0x%x ec %d)\n",
+ afs_printable_VolumeId_lu(vp->hashid), V_attachState(vp),
+ V_attachFlags(vp), *ec);
+ }
VChangeState_r(vp, error_state);
}
#endif /* AFS_DEMAND_ATTACH_FS */
}
#ifdef AFS_DEMAND_ATTACH_FS
- VCheckSalvage(vp);
+ error_notbroken:
+ if (VCheckSalvage(vp) == VCHECK_SALVAGE_FAIL) {
+ /* The salvage could not be scheduled with the salvage server
+ * due to a hard error. Reset the error code to prevent retry loops by
+ * callers. */
+ if (*ec == VSALVAGING) {
+ *ec = VSALVAGE;
+ }
+ }
if (forcefree) {
FreeVolume(vp);
} else {
Error error;
vp = VGetVolume_r(&error, volumeId);
if (vp) {
- osi_Assert(V_inUse(vp) == 0);
+ opr_Assert(V_inUse(vp) == 0);
VDetachVolume_r(ec, vp);
}
return NULL;
}
#endif /* AFS_DEMAND_ATTACH_FS */
+/**** volume timeout-related stuff ****/
+
+#ifdef AFS_PTHREAD_ENV
+
+static struct timespec *shutdown_timeout;
+static pthread_once_t shutdown_timeout_once = PTHREAD_ONCE_INIT;
+
+static_inline int
+VTimedOut(const struct timespec *ts)
+{
+ struct timeval tv;
+ int code;
+
+ if (ts->tv_sec == 0) {
+ /* short-circuit; this will have always timed out */
+ return 1;
+ }
+
+ code = gettimeofday(&tv, NULL);
+ if (code) {
+ Log("Error %d from gettimeofday, assuming we have not timed out\n", errno);
+ /* assume no timeout; failure mode is we just wait longer than normal
+ * instead of returning errors when we shouldn't */
+ return 0;
+ }
+
+ if (tv.tv_sec < ts->tv_sec ||
+ (tv.tv_sec == ts->tv_sec && tv.tv_usec*1000 < ts->tv_nsec)) {
+
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * Calculate an absolute timeout.
+ *
+ * @param[out] ts A timeout that is "timeout" seconds from now, if we return
+ * NULL, the memory is not touched
+ * @param[in] timeout How long the timeout should be from now
+ *
+ * @return timeout to use
+ * @retval NULL no timeout; wait forever
+ * @retval non-NULL the given value for "ts"
+ *
+ * @internal
+ */
+static struct timespec *
+VCalcTimeout(struct timespec *ts, afs_int32 timeout)
+{
+ struct timeval now;
+ int code;
+
+ if (timeout < 0) {
+ return NULL;
+ }
+
+ if (timeout == 0) {
+ ts->tv_sec = ts->tv_nsec = 0;
+ return ts;
+ }
+
+ code = gettimeofday(&now, NULL);
+ if (code) {
+ Log("Error %d from gettimeofday, falling back to 'forever' timeout\n", errno);
+ return NULL;
+ }
+
+ ts->tv_sec = now.tv_sec + timeout;
+ ts->tv_nsec = now.tv_usec * 1000;
+
+ return ts;
+}
+
+/**
+ * Initialize the shutdown_timeout global.
+ */
+static void
+VShutdownTimeoutInit(void)
+{
+ struct timespec *ts;
+
+ ts = malloc(sizeof(*ts));
+
+ shutdown_timeout = VCalcTimeout(ts, vol_opts.offline_shutdown_timeout);
+
+ if (!shutdown_timeout) {
+ free(ts);
+ }
+}
+
+/**
+ * Figure out the timeout that should be used for waiting for offline volumes.
+ *
+ * @param[out] ats Storage space for a local timeout value if needed
+ *
+ * @return The timeout value that should be used
+ * @retval NULL No timeout; wait forever for offlining volumes
+ * @retval non-NULL A pointer to the absolute time that should be used as
+ * the deadline for waiting for offlining volumes.
+ *
+ * @note If we return non-NULL, the pointer we return may or may not be the
+ * same as "ats"
+ */
+static const struct timespec *
+VOfflineTimeout(struct timespec *ats)
+{
+ if (vol_shutting_down) {
+ opr_Verify(pthread_once(&shutdown_timeout_once,
+ VShutdownTimeoutInit) == 0);
+ return shutdown_timeout;
+ } else {
+ return VCalcTimeout(ats, vol_opts.offline_timeout);
+ }
+}
+
+#else /* AFS_PTHREAD_ENV */
+
+/* Waiting a certain amount of time for offlining volumes is not supported
+ * for LWP due to a lack of primitives. So, we never time out */
+# define VTimedOut(x) (0)
+# define VOfflineTimeout(x) (NULL)
+
+#endif /* !AFS_PTHREAD_ENV */
+
#if 0
static int
VHold(Volume * vp)
}
#endif
+static afs_int32
+VIsGoingOffline_r(struct Volume *vp)
+{
+ afs_int32 code = 0;
+
+ if (vp->goingOffline) {
+ if (vp->specialStatus) {
+ code = vp->specialStatus;
+ } else if (V_inService(vp) == 0 || V_blessed(vp) == 0) {
+ code = VNOVOL;
+ } else {
+ code = VOFFLINE;
+ }
+ }
+
+ return code;
+}
+
+/**
+ * Tell the caller if a volume is waiting to go offline.
+ *
+ * @param[in] vp The volume we want to know about
+ *
+ * @return volume status
+ * @retval 0 volume is not waiting to go offline, go ahead and use it
+ * @retval nonzero volume is waiting to offline, and give the returned code
+ * as an error to anyone accessing the volume
+ *
+ * @pre VOL_LOCK is NOT held
+ * @pre caller holds a heavyweight reference on vp
+ */
+afs_int32
+VIsGoingOffline(struct Volume *vp)
+{
+ afs_int32 code;
+
+ VOL_LOCK;
+ code = VIsGoingOffline_r(vp);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * Register an RX call with a volume.
+ *
+ * @param[inout] ec Error code; if unset when passed in, may be set if
+ * the volume starts going offline
+ * @param[out] client_ec @see GetVolume
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct containing the RX call to register
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds heavy ref on vp
+ *
+ * @internal
+ */
+static void
+VRegisterCall_r(Error *ec, Error *client_ec, Volume *vp, struct VCallByVol *cbv)
+{
+ if (vp && cbv) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (!*ec) {
+ /* just in case the volume started going offline after we got the
+ * reference to it... otherwise, if the volume started going
+ * offline right at the end of GetVolume(), we might race with the
+ * RX call scanner, and return success and add our cbv to the
+ * rx_call_list _after_ the scanner has scanned the list. */
+ *ec = VIsGoingOffline_r(vp);
+ if (client_ec) {
+ *client_ec = *ec;
+ }
+ }
+
+ while (V_attachState(vp) == VOL_STATE_SCANNING_RXCALLS) {
+ VWaitStateChange_r(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ queue_Prepend(&vp->rx_call_list, cbv);
+ }
+}
+
+/**
+ * Deregister an RX call with a volume.
+ *
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct containing the RX call to deregister
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds heavy ref on vp
+ *
+ * @internal
+ */
+static void
+VDeregisterCall_r(Volume *vp, struct VCallByVol *cbv)
+{
+ if (cbv && queue_IsOnQueue(cbv)) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ while (V_attachState(vp) == VOL_STATE_SCANNING_RXCALLS) {
+ VWaitStateChange_r(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ queue_Remove(cbv);
+ }
+}
/***************************************************/
/* get and put volume routines */
void
VPutVolume_r(Volume * vp)
{
- osi_Assert(--vp->nUsers >= 0);
+ opr_Verify(--vp->nUsers >= 0);
if (vp->nUsers == 0) {
VCheckOffline(vp);
ReleaseVolumeHeader(vp->header);
VOL_UNLOCK;
}
+/**
+ * Puts a volume reference obtained with VGetVolumeWithCall.
+ *
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct given to VGetVolumeWithCall, or NULL if none
+ *
+ * @pre VOL_LOCK is NOT held
+ */
+void
+VPutVolumeWithCall(Volume *vp, struct VCallByVol *cbv)
+{
+ VOL_LOCK;
+ VDeregisterCall_r(vp, cbv);
+ VPutVolume_r(vp);
+ VOL_UNLOCK;
+}
/* Get a pointer to an attached volume. The pointer is returned regardless
of whether or not the volume is in service or on/off line. An error
code, however, is returned with an indication of the volume's status */
Volume *
-VGetVolume(Error * ec, Error * client_ec, VolId volumeId)
+VGetVolume(Error * ec, Error * client_ec, VolumeId volumeId)
{
Volume *retVal;
VOL_LOCK;
return retVal;
}
-/* same as VGetVolume, but if a volume is waiting to go offline, we return
- * that it is actually offline, instead of waiting for it to go offline */
+/**
+ * Get a volume reference associated with an RX call.
+ *
+ * @param[out] ec @see GetVolume
+ * @param[out] client_ec @see GetVolume
+ * @param[in] volumeId @see GetVolume
+ * @param[in] ts How long to wait for going-offline volumes (absolute time).
+ * If NULL, wait forever. If ts->tv_sec == 0, return immediately
+ * with an error if the volume is going offline.
+ * @param[in] cbv Contains an RX call to be associated with this volume
+ * reference. This call may be interrupted if the volume is
+ * requested to go offline while we hold a ref on it. Give NULL
+ * to not associate an RX call with this reference.
+ *
+ * @return @see GetVolume
+ *
+ * @note for LWP builds, ts must be NULL
+ *
+ * @note A reference obtained with this function MUST be put back with
+ * VPutVolumeWithCall
+ */
Volume *
-VGetVolumeNoWait(Error * ec, Error * client_ec, VolId volumeId)
+VGetVolumeWithCall(Error * ec, Error * client_ec, VolumeId volumeId,
+ const struct timespec *ts, struct VCallByVol *cbv)
{
Volume *retVal;
VOL_LOCK;
- retVal = GetVolume(ec, client_ec, volumeId, NULL, 1);
+ retVal = GetVolume(ec, client_ec, volumeId, NULL, ts);
+ VRegisterCall_r(ec, client_ec, retVal, cbv);
VOL_UNLOCK;
return retVal;
}
Volume *
-VGetVolume_r(Error * ec, VolId volumeId)
+VGetVolume_r(Error * ec, VolumeId volumeId)
{
- return GetVolume(ec, NULL, volumeId, NULL, 0);
+ return GetVolume(ec, NULL, volumeId, NULL, NULL);
}
/* try to get a volume we've previously looked up */
Volume *
VGetVolumeByVp_r(Error * ec, Volume * vp)
{
- return GetVolume(ec, NULL, vp->hashid, vp, 0);
+ return GetVolume(ec, NULL, vp->hashid, vp, NULL);
}
/**
* @param[out] client_ec wire error code to be given to clients
* @param[in] volumeId ID of the volume we want
* @param[in] hint optional hint for hash lookups, or NULL
- * @param[in] nowait 0 to wait for a 'goingOffline' volume to go offline
- * before returning, 1 to return immediately
+ * @param[in] timeout absolute deadline for waiting for the volume to go
+ * offline, if it is going offline. NULL to wait forever.
*
* @return a volume handle for the specified volume
* @retval NULL an error occurred, or the volume is in such a state that
* we cannot load a header or return any volume struct
*
* @note for DAFS, caller must NOT hold a ref count on 'hint'
+ *
+ * @note 'timeout' is only checked if the volume is actually going offline; so
+ * if you pass timeout->tv_sec = 0, this will exhibit typical
+ * nonblocking behavior.
+ *
+ * @note for LWP builds, 'timeout' must be NULL
*/
static Volume *
-GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int nowait)
+GetVolume(Error * ec, Error * client_ec, VolumeId volumeId, Volume * hint,
+ const struct timespec *timeout)
{
Volume *vp = hint;
/* pull this profiling/debugging code out of regular builds */
* - VOL_STATE_SHUTTING_DOWN
*/
if ((V_attachState(vp) == VOL_STATE_ERROR) ||
- (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN) ||
- (V_attachState(vp) == VOL_STATE_GOING_OFFLINE)) {
+ (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN)) {
*ec = VNOVOL;
vp = NULL;
break;
}
/*
- * short circuit with VOFFLINE for VOL_STATE_UNATTACHED and
+ * short circuit with VOFFLINE for VOL_STATE_UNATTACHED/GOING_OFFLINE and
* VNOVOL for VOL_STATE_DELETED
*/
if ((V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) ||
(V_attachState(vp) == VOL_STATE_DELETED)) {
if (vp->specialStatus) {
*ec = vp->specialStatus;
}
if (V_attachState(vp) == VOL_STATE_PREATTACHED) {
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
+ vp = NULL;
+ break;
+ }
avp = VAttachVolumeByVp_r(ec, vp, 0);
if (avp) {
if (vp != avp) {
case VSALVAGING:
break;
case VOFFLINE:
- if (!vp->pending_vol_op) {
- endloop = 1;
+ endloop = 1;
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
}
break;
+
default:
- *ec = VNOVOL;
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
+ } else {
+ *ec = VNOVOL;
+ }
endloop = 1;
}
if (endloop) {
vp = NULL;
break;
}
-#endif
-#ifdef AFS_DEMAND_ATTACH_FS
+ if (VIsErrorState(V_attachState(vp))) {
+ /* make sure we don't take a vp in VOL_STATE_ERROR state and use
+ * it, or transition it out of that state */
+ if (!*ec) {
+ *ec = VNOVOL;
+ }
+ vp = NULL;
+ break;
+ }
+
/*
- * this test MUST happen after VAttachVolymeByVp, so vol_op_state is
- * not VolOpRunningUnknown (attach2 would have converted it to Online
- * or Offline)
+ * this test MUST happen after VAttachVolymeByVp, so we have no
+ * conflicting vol op. (attach2 would have errored out if we had one;
+ * specifically attach_check_vop must have detected a conflicting vop)
*/
+ opr_Assert(!vp->pending_vol_op || vp->pending_vol_op->vol_op_state == FSSYNC_VolOpRunningOnline);
- /* only valid before/during demand attachment */
- osi_Assert(!vp->pending_vol_op || vp->pending_vol_op->vol_op_state != FSSYNC_VolOpRunningUnknown);
-
- /* deny getvolume due to running mutually exclusive vol op */
- if (vp->pending_vol_op && vp->pending_vol_op->vol_op_state==FSSYNC_VolOpRunningOffline) {
- /*
- * volume cannot remain online during this volume operation.
- * notify client.
- */
- if (vp->specialStatus) {
- /*
- * special status codes outrank normal VOFFLINE code
- */
- *ec = vp->specialStatus;
- if (client_ec) {
- *client_ec = vp->specialStatus;
- }
- } else {
- if (client_ec) {
- /* see CheckVnode() in afsfileprocs.c for an explanation
- * of this error code logic */
- afs_uint32 now = FT_ApproxTime();
- if ((vp->stats.last_vol_op + (10 * 60)) >= now) {
- *client_ec = VBUSY;
- } else {
- *client_ec = VRESTARTING;
- }
- }
- *ec = VOFFLINE;
- }
- VChangeState_r(vp, VOL_STATE_UNATTACHED);
- FreeVolumeHeader(vp);
- vp = NULL;
- break;
- }
#endif /* AFS_DEMAND_ATTACH_FS */
LoadVolumeHeader(ec, vp);
VGET_CTR_INC(V6);
/* Only log the error if it was a totally unexpected error. Simply
* a missing inode is likely to be caused by the volume being deleted */
- if (errno != ENXIO || LogLevel)
- Log("Volume %u: couldn't reread volume header\n",
- vp->hashid);
+ if (errno != ENXIO || GetLogLevel() != 0)
+ Log("Volume %" AFS_VOLID_FMT ": couldn't reread volume header\n",
+ afs_printable_VolumeId_lu(vp->hashid));
#ifdef AFS_DEMAND_ATTACH_FS
if (VCanScheduleSalvage()) {
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0 /*flags*/);
} else {
FreeVolume(vp);
vp = NULL;
if (programType == fileServer) {
VGET_CTR_INC(V9);
- if (vp->goingOffline && !nowait) {
- VGET_CTR_INC(V10);
+ if (vp->goingOffline) {
+ if (timeout && VTimedOut(timeout)) {
+ /* we've timed out; don't wait for the vol */
+ } else {
+ VGET_CTR_INC(V10);
#ifdef AFS_DEMAND_ATTACH_FS
- /* wait for the volume to go offline */
- if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
- VWaitStateChange_r(vp);
- }
+ /* wait for the volume to go offline */
+ if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
+ VTimedWaitStateChange_r(vp, timeout, NULL);
+ }
#elif defined(AFS_PTHREAD_ENV)
- VOL_CV_WAIT(&vol_put_volume_cond);
+ VOL_CV_TIMEDWAIT(&vol_put_volume_cond, timeout, NULL);
#else /* AFS_PTHREAD_ENV */
- LWP_WaitProcess(VPutVolume);
+ /* LWP has no timed wait, so the caller better not be
+ * expecting one */
+ opr_Assert(!timeout);
+ LWP_WaitProcess(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
- continue;
+ continue;
+ }
}
if (vp->specialStatus) {
VGET_CTR_INC(V11);
#endif /* AFS_DEMAND_ATTACH_FS */
not_inited:
- osi_Assert(vp || *ec);
+ opr_Assert(vp || *ec);
return vp;
}
{
Error error;
- osi_Assert(vp->nUsers > 0);
- osi_Assert(programType == fileServer);
+ opr_Assert(vp->nUsers > 0);
+ opr_Assert(programType == fileServer);
VCreateReservation_r(vp);
VWaitExclusiveState_r(vp);
void
VTakeOffline_r(Volume * vp)
{
- osi_Assert(vp->nUsers > 0);
- osi_Assert(programType == fileServer);
+ opr_Assert(vp->nUsers > 0);
+ opr_Assert(programType == fileServer);
vp->goingOffline = 1;
V_needsSalvaged(vp) = 1;
strcpy(V_offlineMessage(vp),
"Forced offline due to internal error: volume needs to be salvaged");
- Log("Volume %u forced offline: it needs salvaging!\n", V_id(vp));
+ Log("Volume %" AFS_VOLID_FMT " forced offline: it needs salvaging!\n", afs_printable_VolumeId_lu(V_id(vp)));
V_inUse(vp) = 0;
vp->goingOffline = 0;
}
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, 0 /*flags*/);
#endif /* AFS_DEMAND_ATTACH_FS */
#ifdef AFS_PTHREAD_ENV
- CV_BROADCAST(&vol_put_volume_cond);
+ opr_cv_broadcast(&vol_put_volume_cond);
#else /* AFS_PTHREAD_ENV */
LWP_NoYieldSignal(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
VOL_UNLOCK;
}
+/**
+ * Iterate over the RX calls associated with a volume, and interrupt them.
+ *
+ * @param[in] vp The volume whose RX calls we want to scan
+ *
+ * @pre VOL_LOCK held
+ */
+static void
+VScanCalls_r(struct Volume *vp)
+{
+ struct VCallByVol *cbv, *ncbv;
+ afs_int32 err;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+#endif
+
+ if (queue_IsEmpty(&vp->rx_call_list))
+ return; /* no calls to interrupt */
+ if (!vol_opts.interrupt_rxcall)
+ return; /* we have no function with which to interrupt calls */
+ err = VIsGoingOffline_r(vp);
+ if (!err)
+ return; /* we're not going offline anymore */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VWaitExclusiveState_r(vp);
+ state_save = VChangeState_r(vp, VOL_STATE_SCANNING_RXCALLS);
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ for(queue_Scan(&vp->rx_call_list, cbv, ncbv, VCallByVol)) {
+ if (GetLogLevel() != 0) {
+ struct rx_peer *peer;
+ char hoststr[16];
+ peer = rx_PeerOf(rx_ConnectionOf(cbv->call));
+
+ Log("Offlining volume %" AFS_VOLID_FMT " while client %s:%u is trying to read "
+ "from it; kicking client off with error %ld\n",
+ afs_printable_VolumeId_lu(vp->hashid),
+ afs_inet_ntoa_r(rx_HostOf(peer), hoststr),
+ (unsigned) ntohs(rx_PortOf(peer)),
+ (long) err);
+ }
+ (*vol_opts.interrupt_rxcall) (cbv->call, err);
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VChangeState_r(vp, state_save);
+#endif /* AFS_DEMAND_ATTACH_FS */
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * Wait for a vp to go offline.
+ *
+ * @param[out] ec 1 if a salvage on the volume has been requested and
+ * salvok == 0, 0 otherwise
+ * @param[in] vp The volume to wait for
+ * @param[in] salvok If 0, we return immediately with *ec = 1 if the volume
+ * has been requested to salvage. Otherwise we keep waiting
+ * until the volume has gone offline.
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds a lightweight ref on vp
+ *
+ * @note DAFS only
+ */
+static void
+VWaitForOfflineByVp_r(Error *ec, struct Volume *vp, int salvok)
+{
+ struct timespec timeout_ts;
+ const struct timespec *ts;
+ int timedout = 0;
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ *ec = 0;
+
+ while (!VIsOfflineState(V_attachState(vp)) && !timedout) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+ VTimedWaitStateChange_r(vp, ts, &timedout);
+ }
+ if (!timedout) {
+ /* we didn't time out, so the volume must be offline, so we're done */
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+
+ while (!VIsOfflineState(V_attachState(vp))) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+
+ VWaitStateChange_r(vp);
+ }
+}
+
+#else /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * Wait for a volume to go offline.
+ *
+ * @pre VOL_LOCK held
+ *
+ * @note non-DAFS only (for DAFS, use @see WaitForOfflineByVp_r)
+ */
+static void
+VWaitForOffline_r(Error *ec, VolumeId volid)
+{
+ struct Volume *vp;
+ const struct timespec *ts;
+#ifdef AFS_PTHREAD_ENV
+ struct timespec timeout_ts;
+#endif
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ vp = GetVolume(ec, NULL, volid, NULL, ts);
+ if (!vp) {
+ /* error occurred so bad that we can't even get a vp; we have no
+ * information on the vol so we don't know whether to wait, so just
+ * return */
+ return;
+ }
+ if (!VIsGoingOffline_r(vp)) {
+ /* volume is no longer going offline, so we're done */
+ VPutVolume_r(vp);
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+ VPutVolume_r(vp);
+ vp = NULL;
+
+ vp = VGetVolume_r(ec, volid);
+ if (vp) {
+ /* In case it was reattached... */
+ VPutVolume_r(vp);
+ }
+}
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
/* The opposite of VAttachVolume. The volume header is written to disk, with
the inUse bit turned off. A copy of the header is maintained in memory,
however (which is why this is VOffline, not VDetach).
void
VOffline_r(Volume * vp, char *message)
{
-#ifndef AFS_DEMAND_ATTACH_FS
Error error;
+#ifndef AFS_DEMAND_ATTACH_FS
VolumeId vid = V_id(vp);
#endif
- osi_Assert(programType != volumeUtility && programType != volumeServer);
+ opr_Assert(programType != volumeUtility && programType != volumeServer);
if (!V_inUse(vp)) {
VPutVolume_r(vp);
return;
VChangeState_r(vp, VOL_STATE_GOING_OFFLINE);
VCreateReservation_r(vp);
VPutVolume_r(vp);
-
- /* wait for the volume to go offline */
- if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
- VWaitStateChange_r(vp);
- }
+ VWaitForOfflineByVp_r(&error, vp, 1);
VCancelReservation_r(vp);
#else /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
- vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */
- if (vp) /* In case it was reattached... */
- VPutVolume_r(vp);
+ VWaitForOffline_r(&error, vid);
#endif /* AFS_DEMAND_ATTACH_FS */
}
void
VOfflineForVolOp_r(Error *ec, Volume *vp, char *message)
{
- osi_Assert(vp->pending_vol_op);
+ int salvok = 1;
+ opr_Assert(vp->pending_vol_op);
if (!V_inUse(vp)) {
VPutVolume_r(vp);
*ec = 1;
VCreateReservation_r(vp);
VPutVolume_r(vp);
- /* Wait for the volume to go offline */
- while (!VIsOfflineState(V_attachState(vp))) {
+ if (vp->pending_vol_op->com.programType != salvageServer) {
/* do not give corrupted volumes to the volserver */
- if (vp->salvage.requested && vp->pending_vol_op->com.programType != salvageServer) {
- *ec = 1;
- goto error;
- }
- VWaitStateChange_r(vp);
+ salvok = 0;
}
+
*ec = 0;
- error:
+ VWaitForOfflineByVp_r(ec, vp, salvok);
+
VCancelReservation_r(vp);
}
#endif /* AFS_DEMAND_ATTACH_FS */
if (VCanUseFSSYNC()) {
notifyServer = vp->needsPutBack;
if (V_destroyMe(vp) == DESTROY_ME)
- useDone = FSYNC_VOL_DONE;
-#ifdef AFS_DEMAND_ATTACH_FS
+ useDone = FSYNC_VOL_LEAVE_OFF;
+# ifdef AFS_DEMAND_ATTACH_FS
else if (!V_blessed(vp) || !V_inService(vp))
useDone = FSYNC_VOL_LEAVE_OFF;
-#endif
+# endif
+ }
+# ifdef AFS_DEMAND_ATTACH_FS
+ if (V_needsSalvaged(vp)) {
+ notifyServer = 0;
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, 0);
}
+# endif
tpartp = vp->partition;
volume = V_id(vp);
#endif /* FSSYNC_BUILD_CLIENT */
*/
#ifdef FSSYNC_BUILD_CLIENT
if (VCanUseFSSYNC() && notifyServer) {
+ if (notifyServer == VOL_PUTBACK_DELETE) {
+ /* Only send FSYNC_VOL_DONE if the volume was actually deleted.
+ * volserver code will set needsPutBack to VOL_PUTBACK_DELETE
+ * to signify a deleted volume. */
+ useDone = FSYNC_VOL_DONE;
+ }
/*
* Note: The server is not notified in the case of a bogus volume
* explicitly to make it possible to create a volume, do a partial
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_OFFLINING);
+
+ VOL_UNLOCK;
#endif
- /* demand attach fs
- *
- * XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex...
- *
- * VCloseVnodeFiles_r drops the glock internally */
DFlushVolume(vp->hashid);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ /* DAFS: VCloseVnodeFiles_r drops the glock internally */
VCloseVnodeFiles_r(vp);
#ifdef AFS_DEMAND_ATTACH_FS
IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
IH_CONDSYNC(vp->diskDataHandle);
-#ifdef AFS_NT40_ENV
+#ifdef AFS_NAMEI_ENV
IH_CONDSYNC(vp->linkHandle);
-#endif /* AFS_NT40_ENV */
+#endif /* AFS_NAMEI_ENV */
}
IH_REALLYCLOSE(vp->vnodeIndex[vLarge].handle);
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_DETACHING);
+
+ VOL_UNLOCK;
#endif
- /* XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex... */
DFlushVolume(vp->hashid);
- VReleaseVnodeFiles_r(vp); /* releases the glock internally */
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ VReleaseVnodeFiles_r(vp); /* DAFS: releases the glock internally */
#ifdef AFS_DEMAND_ATTACH_FS
VOL_UNLOCK;
IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
IH_CONDSYNC(vp->diskDataHandle);
-#ifdef AFS_NT40_ENV
+#ifdef AFS_NAMEI_ENV
IH_CONDSYNC(vp->linkHandle);
-#endif /* AFS_NT40_ENV */
+#endif /* AFS_NAMEI_ENV */
}
IH_RELEASE(vp->vnodeIndex[vLarge].handle);
#endif
*ec = 0;
- if (programType == fileServer)
- V_uniquifier(vp) =
- (V_inUse(vp) ? V_nextVnodeUnique(vp) +
- 200 : V_nextVnodeUnique(vp));
+ if (programType == fileServer) {
+ if (!V_inUse(vp)) {
+ V_uniquifier(vp) = V_nextVnodeUnique(vp);
+ } else {
+ V_uniquifier(vp) =
+ V_nextVnodeUnique(vp) + VOLUME_UPDATE_UNIQUIFIER_BUMP;
+ if (V_uniquifier(vp) < V_nextVnodeUnique(vp)) {
+ /* uniquifier rolled over; reset the counters */
+ V_nextVnodeUnique(vp) = 2; /* 1 is reserved for the root vnode */
+ V_uniquifier(vp) =
+ V_nextVnodeUnique(vp) + VOLUME_UPDATE_UNIQUIFIER_BUMP;
+ }
+ }
+ }
#ifdef AFS_DEMAND_ATTACH_FS
state_save = VChangeState_r(vp, VOL_STATE_UPDATING);
#endif
if (*ec) {
- Log("VUpdateVolume: error updating volume header, volume %u (%s)\n",
- V_id(vp), V_name(vp));
+ Log("VUpdateVolume: error updating volume header, volume %" AFS_VOLID_FMT " (%s)\n",
+ afs_printable_VolumeId_lu(V_id(vp)), V_name(vp));
/* try to update on-disk header,
* while preventing infinite recursion */
if (!(flags & VOL_UPDATE_NOFORCEOFF)) {
VOL_UNLOCK;
#endif
fdP = IH_OPEN(V_diskDataHandle(vp));
- osi_Assert(fdP != NULL);
+ opr_Assert(fdP != NULL);
code = FDH_SYNC(fdP);
- osi_Assert(code == 0);
+ opr_Assert(code == 0);
FDH_CLOSE(fdP);
#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
V_inUse(vp) = 0;
VUpdateVolume_r(&ec, vp, VOL_UPDATE_NOFORCEOFF);
if (ec) {
- Log("VCheckDetach: volume header update for volume %u "
- "failed with errno %d\n", vp->hashid, errno);
+ Log("VCheckDetach: volume header update for volume %" AFS_VOLID_FMT " "
+ "failed with errno %d\n", afs_printable_VolumeId_lu(vp->hashid), errno);
}
}
VReleaseVolumeHandles_r(vp);
VCheckSalvage(vp);
ReallyFreeVolume(vp);
if (programType == fileServer) {
- CV_BROADCAST(&vol_put_volume_cond);
+ opr_cv_broadcast(&vol_put_volume_cond);
}
}
return ret;
V_inUse(vp) = 0;
VUpdateVolume_r(&ec, vp, VOL_UPDATE_NOFORCEOFF);
if (ec) {
- Log("VCheckDetach: volume header update for volume %u failed with errno %d\n",
- vp->hashid, errno);
+ Log("VCheckDetach: volume header update for volume %" AFS_VOLID_FMT " failed with errno %d\n",
+ afs_printable_VolumeId_lu(vp->hashid), errno);
}
}
VReleaseVolumeHandles_r(vp);
ReallyFreeVolume(vp);
if (programType == fileServer) {
#if defined(AFS_PTHREAD_ENV)
- CV_BROADCAST(&vol_put_volume_cond);
+ opr_cv_broadcast(&vol_put_volume_cond);
#else /* AFS_PTHREAD_ENV */
LWP_NoYieldSignal(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
if (vp->goingOffline && !vp->nUsers) {
Error error;
- osi_Assert(programType == fileServer);
- osi_Assert((V_attachState(vp) != VOL_STATE_ATTACHED) &&
+ opr_Assert(programType == fileServer);
+ opr_Assert((V_attachState(vp) != VOL_STATE_ATTACHED) &&
(V_attachState(vp) != VOL_STATE_FREED) &&
(V_attachState(vp) != VOL_STATE_PREATTACHED) &&
(V_attachState(vp) != VOL_STATE_UNATTACHED) &&
VUpdateVolume_r(&error, vp, 0);
VCloseVolumeHandles_r(vp);
- if (LogLevel) {
+ if (GetLogLevel() != 0) {
if (V_offlineMessage(vp)[0]) {
Log("VOffline: Volume %lu (%s) is now offline (%s)\n",
afs_printable_uint32_lu(V_id(vp)), V_name(vp),
if (vp->goingOffline && !vp->nUsers) {
Error error;
- osi_Assert(programType == fileServer);
+ opr_Assert(programType == fileServer);
ret = 1;
vp->goingOffline = 0;
V_inUse(vp) = 0;
VUpdateVolume_r(&error, vp, 0);
VCloseVolumeHandles_r(vp);
- if (LogLevel) {
+ if (GetLogLevel() != 0) {
if (V_offlineMessage(vp)[0]) {
Log("VOffline: Volume %lu (%s) is now offline (%s)\n",
afs_printable_uint32_lu(V_id(vp)), V_name(vp),
}
FreeVolumeHeader(vp);
#ifdef AFS_PTHREAD_ENV
- CV_BROADCAST(&vol_put_volume_cond);
+ opr_cv_broadcast(&vol_put_volume_cond);
#else /* AFS_PTHREAD_ENV */
LWP_NoYieldSignal(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
void
VCancelReservation_r(Volume * vp)
{
- osi_Assert(--vp->nWaiters >= 0);
+ opr_Verify(--vp->nWaiters >= 0);
if (vp->nWaiters == 0) {
VCheckOffline(vp);
if (!VCheckDetach(vp)) {
FSSYNC_VolOp_info * info;
/* attach a vol op info node to the volume struct */
- info = (FSSYNC_VolOp_info *) malloc(sizeof(FSSYNC_VolOp_info));
- osi_Assert(info != NULL);
+ info = malloc(sizeof(FSSYNC_VolOp_info));
+ opr_Assert(info != NULL);
memcpy(info, vopinfo, sizeof(FSSYNC_VolOp_info));
vp->pending_vol_op = info;
* @param[in] vp pointer to volume object
*
* @return status code
- * @retval 0 no salvage scheduled
- * @retval 1 a salvage has been scheduled with the salvageserver
+ * @retval VCHECK_SALVAGE_OK (0) no pending salvage
+ * @retval VCHECK_SALVAGE_SCHEDULED (1) salvage has been scheduled
+ * @retval VCHECK_SALVAGE_ASYNC (2) salvage being scheduled
+ * @retval VCHECK_SALVAGE_DENIED (3) salvage not scheduled; denied
+ * @retval VCHECK_SALVAGE_FAIL (4) salvage not scheduled; failed
*
* @pre VOL_LOCK is held
*
static int
VCheckSalvage(Volume * vp)
{
- int ret = 0;
+ int ret = VCHECK_SALVAGE_OK;
+
#if defined(SALVSYNC_BUILD_CLIENT) || defined(FSSYNC_BUILD_CLIENT)
- if (vp->nUsers)
- return ret;
if (!vp->salvage.requested) {
- return ret;
+ return VCHECK_SALVAGE_OK;
+ }
+ if (vp->nUsers) {
+ return VCHECK_SALVAGE_ASYNC;
}
/* prevent recursion; some of the code below creates and removes
* lightweight refs, which can call VCheckSalvage */
if (vp->salvage.scheduling) {
- return ret;
+ return VCHECK_SALVAGE_ASYNC;
}
vp->salvage.scheduling = 1;
if (V_attachState(vp) == VOL_STATE_SALVAGE_REQ) {
if (!VOfflineForSalvage_r(vp)) {
vp->salvage.scheduling = 0;
- return ret;
+ return VCHECK_SALVAGE_FAIL;
}
}
if (vp->salvage.requested) {
- VScheduleSalvage_r(vp);
- ret = 1;
+ ret = VScheduleSalvage_r(vp);
}
vp->salvage.scheduling = 0;
#endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */
* @param[in] flags see flags note below
*
* @note flags:
- * VOL_SALVAGE_INVALIDATE_HEADER causes volume header cache entry
- * to be invalidated.
+ * VOL_SALVAGE_NO_OFFLINE do not need to wait to offline the volume; it has
+ * not been fully attached
*
* @pre VOL_LOCK is held.
*
VOfflineForSalvage_r(vp);
}
}
+ /* If we are non-fileserver, we're telling the fileserver to
+ * salvage the vol, so we don't need to give it back separately. */
+ vp->needsPutBack = 0;
+
*ec = VSALVAGING;
} else {
- Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
+ Log("VRequestSalvage: volume %" AFS_VOLID_FMT " online salvaged too many times; forced offline.\n", afs_printable_VolumeId_lu(vp->hashid));
/* make sure neither VScheduleSalvage_r nor
* VUpdateSalvagePriority_r try to schedule another salvage */
*ec = VSALVAGE;
code = 1;
}
- if (flags & VOL_SALVAGE_INVALIDATE_HEADER) {
- /* Instead of ReleaseVolumeHeader, we do FreeVolumeHeader()
- so that the the next VAttachVolumeByVp_r() invocation
- of attach2() will pull in a cached header
- entry and fail, then load a fresh one from disk and attach
- it to the volume.
- */
+ if ((flags & VOL_SALVAGE_NO_OFFLINE)) {
+ /* Here, we free the header for the volume, but make sure to only
+ * do this if VOL_SALVAGE_NO_OFFLINE is specified. The reason for
+ * this requires a bit of explanation.
+ *
+ * Normally, the volume header will be freed when the volume goes
+ * goes offline. However, if VOL_SALVAGE_NO_OFFLINE has been
+ * specified, the volume was in the process of being attached when
+ * we discovered that it needed salvaging. Thus, the volume will
+ * never go offline, since it never went fully online in the first
+ * place. Specifically, we do not call VOfflineForSalvage_r above,
+ * and we never get rid of the volume via VPutVolume_r; the volume
+ * has not been initialized enough for those to work.
+ *
+ * So instead, explicitly free the volume header here. If we do not
+ * do this, we are wasting a header that some other volume could be
+ * using, since the header remains attached to the volume. Also if
+ * we do not free the header here, we end up with a volume where
+ * nUsers == 0, but the volume has a header that is not on the
+ * header LRU. Some code expects that all nUsers == 0 volumes have
+ * their header on the header LRU (or have no header).
+ *
+ * Also note that we must not free the volume header here if
+ * VOL_SALVAGE_NO_OFFLINE is not set. Since, if
+ * VOL_SALVAGE_NO_OFFLINE is not set, someone else may have a
+ * reference to this volume, and they assume they can use the
+ * volume's header. If we free the volume out from under them, they
+ * can easily segfault.
+ */
FreeVolumeHeader(vp);
}
}
try_SALVSYNC(Volume *vp, char *partName, int *code) {
#ifdef SALVSYNC_BUILD_CLIENT
if (VCanUseSALVSYNC()) {
- Log("Scheduling salvage for volume %lu on part %s over SALVSYNC\n",
- afs_printable_uint32_lu(vp->hashid), partName);
+ Log("Scheduling salvage for volume %" AFS_VOLID_FMT " on part %s over SALVSYNC\n",
+ afs_printable_VolumeId_lu(vp->hashid), partName);
/* can't use V_id() since there's no guarantee
* we have the disk data header at this point */
try_FSSYNC(Volume *vp, char *partName, int *code) {
#ifdef FSSYNC_BUILD_CLIENT
if (VCanUseFSSYNC()) {
- Log("Scheduling salvage for volume %lu on part %s over FSSYNC\n",
- afs_printable_uint32_lu(vp->hashid), partName);
+ Log("Scheduling salvage for volume %" AFS_VOLID_FMT " on part %s over FSSYNC\n",
+ afs_printable_VolumeId_lu(vp->hashid), partName);
/*
* If we aren't the fileserver, tell the fileserver the volume
* @param[in] vp pointer to volume object
*
* @return operation status
- * @retval 0 salvage scheduled successfully
- * @retval 1 salvage not scheduled, or SALVSYNC/FSSYNC com error
+ * @retval VCHECK_SALVAGE_OK (0) no pending salvage
+ * @retval VCHECK_SALVAGE_SCHEDULED (1) salvage has been scheduled
+ * @retval VCHECK_SALVAGE_ASYNC (2) salvage being scheduled
+ * @retval VCHECK_SALVAGE_DENIED (3) salvage not scheduled; denied
+ * @retval VCHECK_SALVAGE_FAIL (4) salvage not scheduled; failed
*
* @pre
* @arg VOL_LOCK is held.
static int
VScheduleSalvage_r(Volume * vp)
{
- int ret=0;
- int code;
+ int ret = VCHECK_SALVAGE_SCHEDULED;
+ int code = 0;
VolState state_save;
VThreadOptions_t * thread_opts;
char partName[16];
- osi_Assert(VCanUseSALVSYNC() || VCanUseFSSYNC());
+ opr_Verify(VCanUseSALVSYNC() || VCanUseFSSYNC());
if (vp->nWaiters || vp->nUsers) {
- return 1;
+ return VCHECK_SALVAGE_ASYNC;
}
/* prevent endless salvage,attach,salvage,attach,... loops */
- if (vp->stats.salvages >= SALVAGE_COUNT_MAX)
- return 1;
+ if (vp->stats.salvages >= SALVAGE_COUNT_MAX) {
+ return VCHECK_SALVAGE_FAIL;
+ }
/*
* don't perform salvsync ops on certain threads
thread_opts = &VThread_defaults;
}
if (thread_opts->disallow_salvsync || vol_disallow_salvsync) {
- return 1;
+ return VCHECK_SALVAGE_ASYNC;
}
if (vp->salvage.scheduled) {
- return ret;
+ return VCHECK_SALVAGE_SCHEDULED;
}
VCreateReservation_r(vp);
* XXX the scheduling process should really be done asynchronously
* to avoid fssync deadlocks
*/
- if (!vp->salvage.scheduled) {
+ if (vp->salvage.scheduled) {
+ ret = VCHECK_SALVAGE_SCHEDULED;
+ } else {
/* if we haven't previously scheduled a salvage, do so now
*
* set the volume to an exclusive state and drop the lock
* around the SALVSYNC call
*/
- strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName));
+ strlcpy(partName, vp->partition->name, sizeof(partName));
state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
VOL_UNLOCK;
- osi_Assert(try_SALVSYNC(vp, partName, &code) ||
- try_FSSYNC(vp, partName, &code));
+ opr_Verify(try_SALVSYNC(vp, partName, &code)
+ || try_FSSYNC(vp, partName, &code));
VOL_LOCK;
VChangeState_r(vp, state_save);
if (code == SYNC_OK) {
+ ret = VCHECK_SALVAGE_SCHEDULED;
vp->salvage.scheduled = 1;
vp->stats.last_salvage_req = FT_ApproxTime();
if (VCanUseSALVSYNC()) {
IncUInt64(&VStats.salvages);
}
} else {
- ret = 1;
switch(code) {
case SYNC_BAD_COMMAND:
case SYNC_COM_ERROR:
+ ret = VCHECK_SALVAGE_FAIL;
break;
case SYNC_DENIED:
- Log("VScheduleSalvage_r: Salvage request for volume %lu "
- "denied\n", afs_printable_uint32_lu(vp->hashid));
+ ret = VCHECK_SALVAGE_DENIED;
+ Log("VScheduleSalvage_r: Salvage request for volume %" AFS_VOLID_FMT " "
+ "denied\n", afs_printable_VolumeId_lu(vp->hashid));
+ break;
+ case SYNC_FAILED:
+ ret = VCHECK_SALVAGE_FAIL;
+ Log("VScheduleSalvage_r: Salvage request for volume %" AFS_VOLID_FMT " "
+ "failed\n", afs_printable_VolumeId_lu(vp->hashid));
break;
default:
- Log("VScheduleSalvage_r: Salvage request for volume %lu "
+ ret = VCHECK_SALVAGE_FAIL;
+ Log("VScheduleSalvage_r: Salvage request for volume %" AFS_VOLID_FMT " "
"received unknown protocol error %d\n",
- afs_printable_uint32_lu(vp->hashid), code);
+ afs_printable_VolumeId_lu(vp->hashid), code);
break;
}
* this, as the caller may reference vp without any refs. Instead, it
* is the duty of the caller to inspect 'vp' after we return to see if
* needs to be freed. */
- osi_Assert(--vp->nWaiters >= 0);
+ opr_Verify(--vp->nWaiters >= 0);
return ret;
}
#endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */
VConnectFS_r(void)
{
int rc;
- osi_Assert((VInit == 2) &&
+ opr_Assert((VInit == 2) &&
(programType != fileServer) &&
(programType != salvager));
rc = FSYNC_clientInit();
void
VDisconnectFS_r(void)
{
- osi_Assert((programType != fileServer) &&
+ opr_Assert((programType != fileServer) &&
(programType != salvager));
FSYNC_clientFinis();
VSetVInit_r(2);
/* volume bitmap routines */
/***************************************************/
+/*
+ * Grow the bitmap by the defined increment
+ */
+void
+VGrowBitmap(struct vnodeIndex *index)
+{
+ byte *bp;
+
+ bp = realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE);
+ osi_Assert(bp != NULL);
+ index->bitmap = bp;
+ bp += index->bitmapSize;
+ memset(bp, 0, VOLUME_BITMAP_GROWSIZE);
+ index->bitmapOffset = index->bitmapSize;
+ index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
+
+ return;
+}
+
/**
* allocate a vnode bitmap number for the vnode
*
VGetBitmap_r(ec, vp, i);
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0 /*flags*/);
#else /* AFS_DEMAND_ATTACH_FS */
DeleteVolumeFromHashTable(vp);
vp->shuttingDown = 1; /* Let who has it free it. */
index->bitmapOffset = (afs_uint32) (bp - index->bitmap);
while (*bp == 0xff)
bp++;
- o = ffs(~*bp) - 1; /* ffs is documented in BSTRING(3) */
+ o = opr_ffs(~*bp) - 1;
*bp |= (1 << o);
ret = ((bp - index->bitmap) * 8 + o);
#ifdef AFS_DEMAND_ATTACH_FS
bp += sizeof(bit32) /* i.e. 4 */ ;
}
/* No bit map entry--must grow bitmap */
- bp = (byte *)
- realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE);
- osi_Assert(bp != NULL);
- index->bitmap = bp;
- bp += index->bitmapSize;
- memset(bp, 0, VOLUME_BITMAP_GROWSIZE);
- index->bitmapOffset = index->bitmapSize;
- index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
+ VGrowBitmap(index);
+ bp = index->bitmap + index->bitmapOffset;
*bp = 1;
ret = index->bitmapOffset * 8;
#ifdef AFS_DEMAND_ATTACH_FS
}
void
-VFreeBitMapEntry_r(Error * ec, struct vnodeIndex *index,
- unsigned bitNumber)
+VFreeBitMapEntry_r(Error * ec, Volume *vp, struct vnodeIndex *index,
+ unsigned bitNumber, int flags)
{
unsigned int offset;
*ec = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (flags & VOL_FREE_BITMAP_WAIT) {
+ /* VAllocBitmapEntry_r allocs bitmap entries under an exclusive volume
+ * state, so ensure we're not in an exclusive volume state when we update
+ * the bitmap */
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ }
+#endif
+
#ifdef BITMAP_LATER
if (!index->bitmap)
- return;
+ goto done;
#endif /* BITMAP_LATER */
+
offset = bitNumber >> 3;
if (offset >= index->bitmapSize) {
*ec = VNOVNODE;
- return;
+ goto done;
}
if (offset < index->bitmapOffset)
index->bitmapOffset = offset & ~3; /* Truncate to nearest bit32 */
*(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
+
+ done:
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (flags & VOL_FREE_BITMAP_WAIT) {
+ VCancelReservation_r(vp);
+ }
+#endif
+ return; /* make the compiler happy for non-DAFS */
}
void
-VFreeBitMapEntry(Error * ec, struct vnodeIndex *index,
+VFreeBitMapEntry(Error * ec, Volume *vp, struct vnodeIndex *index,
unsigned bitNumber)
{
VOL_LOCK;
- VFreeBitMapEntry_r(ec, index, bitNumber);
+ VFreeBitMapEntry_r(ec, vp, index, bitNumber, VOL_FREE_BITMAP_WAIT);
VOL_UNLOCK;
}
VOL_UNLOCK;
fdP = IH_OPEN(vip->handle);
- osi_Assert(fdP != NULL);
+ opr_Assert(fdP != NULL);
file = FDH_FDOPEN(fdP, "r");
- osi_Assert(file != NULL);
- vnode = (VnodeDiskObject *) malloc(vcp->diskSize);
- osi_Assert(vnode != NULL);
+ opr_Assert(file != NULL);
+ vnode = malloc(vcp->diskSize);
+ opr_Assert(vnode != NULL);
size = OS_SIZE(fdP->fd_fd);
- osi_Assert(size != -1);
+ opr_Assert(size != -1);
nVnodes = (size <= vcp->diskSize ? 0 : size - vcp->diskSize)
>> vcp->logSize;
vip->bitmapSize = ((nVnodes / 8) + 10) / 4 * 4; /* The 10 is a little extra so
* it that way */
#ifdef BITMAP_LATER
BitMap = (byte *) calloc(1, vip->bitmapSize);
- osi_Assert(BitMap != NULL);
+ opr_Assert(BitMap != NULL);
#else /* BITMAP_LATER */
vip->bitmap = (byte *) calloc(1, vip->bitmapSize);
- osi_Assert(vip->bitmap != NULL);
+ opr_Assert(vip->bitmap != NULL);
vip->bitmapOffset = 0;
#endif /* BITMAP_LATER */
if (STREAM_ASEEK(file, vcp->diskSize) != -1) {
vip->bitmap = BitMap;
vip->bitmapOffset = 0;
} else
- free((byte *) BitMap);
+ free(BitMap);
#endif /* BITMAP_LATER */
#ifdef AFS_DEMAND_ATTACH_FS
VChangeState_r(vp, state_save);
*
*/
void
-VGetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep)
+VGetVolumePath(Error * ec, VolumeId volumeId, char **partitionp, char **namep)
{
static char partition[VMAXPATHLEN], name[VMAXPATHLEN];
char path[VMAXPATHLEN];
struct DiskPartition64 *dp;
*ec = 0;
- name[0] = '/';
- (void)afs_snprintf(&name[1], (sizeof name) - 1, VFORMAT, afs_printable_uint32_lu(volumeId));
+ name[0] = OS_DIRSEPC;
+ snprintf(&name[1], (sizeof name) - 1, VFORMAT,
+ afs_printable_VolumeId_lu(volumeId));
for (dp = DiskPartitionList; dp; dp = dp->next) {
- struct afs_stat status;
+ struct afs_stat_st status;
strcpy(path, VPartitionPath(dp));
strcat(path, name);
if (afs_stat(path, &status) == 0) {
* @return volume number
*
* @note the string must be of the form VFORMAT. the only permissible
- * deviation is a leading '/' character.
+ * deviation is a leading OS_DIRSEPC character.
*
* @see VFORMAT
*/
int
VolumeNumber(char *name)
{
- if (*name == '/')
+ if (*name == OS_DIRSEPC)
name++;
- return atoi(name + 1);
+ return strtoul(name + 1, NULL, 10);
}
/**
VolumeExternalName(VolumeId volumeId)
{
static char name[VMAXPATHLEN];
- (void)afs_snprintf(name, sizeof name, VFORMAT, afs_printable_uint32_lu(volumeId));
+ snprintf(name, sizeof name, VFORMAT, afs_printable_VolumeId_lu(volumeId));
return name;
}
int
VolumeExternalName_r(VolumeId volumeId, char * name, size_t len)
{
- return afs_snprintf(name, len, VFORMAT, afs_printable_uint32_lu(volumeId));
+ return snprintf(name, len, VFORMAT, afs_printable_VolumeId_lu(volumeId));
}
/* Volume Usage Statistics routines */
/***************************************************/
-#if OPENAFS_VOL_STATS
#define OneDay (86400) /* 24 hours' worth of seconds */
-#else
-#define OneDay (24*60*60) /* 24 hours */
-#endif /* OPENAFS_VOL_STATS */
static time_t
Midnight(time_t t) {
V_dayUse(vp) = 0;
V_dayUseDate(vp) = Midnight(now);
-#if OPENAFS_VOL_STATS
/*
* All we need to do is bzero the entire VOL_STATS_BYTES of
* the detailed volume statistics area.
*/
memset((V_stat_area(vp)), 0, VOL_STATS_BYTES);
-#endif /* OPENAFS_VOL_STATS */
- }
+ }
/*It's been more than a day of collection */
/*
if (now - V_dayUseDate(vp) > OneDay)
VAdjustVolumeStatistics_r(vp);
/*
- * Save the volume header image to disk after every 128 bumps to dayUse.
+ * Save the volume header image to disk after a threshold of bumps to dayUse,
+ * at most every usage_rate_limit seconds.
*/
- if ((V_dayUse(vp)++ & 127) == 0) {
+ V_dayUse(vp)++;
+ vp->usage_bumps_outstanding++;
+ if (vp->usage_bumps_outstanding >= vol_opts.usage_threshold
+ && vp->usage_bumps_next_write <= now) {
Error error;
+ vp->usage_bumps_outstanding = 0;
+ vp->usage_bumps_next_write = now + vol_opts.usage_rate_limit;
VUpdateVolume_r(&error, vp, VOL_UPDATE_WAIT);
}
}
return;
if (UpdateList == NULL) {
updateSize = UPDATE_LIST_SIZE;
- UpdateList = (VolumeId *) malloc(sizeof(VolumeId) * updateSize);
+ UpdateList = malloc(sizeof(VolumeId) * updateSize);
} else {
if (nUpdatedVolumes == updateSize) {
updateSize <<= 1;
Log("warning: there is likely a bug in the volume update scanner\n");
return;
}
- UpdateList =
- (VolumeId *) realloc(UpdateList,
- sizeof(VolumeId) * updateSize);
+ UpdateList = realloc(UpdateList,
+ sizeof(VolumeId) * updateSize);
}
}
- osi_Assert(UpdateList != NULL);
+ opr_Assert(UpdateList != NULL);
UpdateList[nUpdatedVolumes++] = V_id(vp);
#endif /* !AFS_DEMAND_ATTACH_FS */
}
queue_Init(&volume_LRU.q[i]);
volume_LRU.q[i].len = 0;
volume_LRU.q[i].busy = 0;
- CV_INIT(&volume_LRU.q[i].cv, "vol lru", CV_DEFAULT, 0);
+ opr_cv_init(&volume_LRU.q[i].cv);
}
/* setup the timing constants */
VLRU_ComputeConstants();
- /* XXX put inside LogLevel check? */
+ /* XXX put inside log level check? */
Log("VLRU: starting scanner with the following configuration parameters:\n");
Log("VLRU: offlining volumes after minimum of %d seconds of inactivity\n", VLRU_offline_thresh);
Log("VLRU: running VLRU soft detach pass every %d seconds\n", VLRU_offline_interval);
/* start up the VLRU scanner */
volume_LRU.scanner_state = VLRU_SCANNER_STATE_OFFLINE;
if (programType == fileServer) {
- CV_INIT(&volume_LRU.cv, "vol lru", CV_DEFAULT, 0);
- osi_Assert(pthread_attr_init(&attrs) == 0);
- osi_Assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
- osi_Assert(pthread_create(&tid, &attrs, &VLRU_ScannerThread, NULL) == 0);
+ opr_cv_init(&volume_LRU.cv);
+ opr_Verify(pthread_attr_init(&attrs) == 0);
+ opr_Verify(pthread_attr_setdetachstate(&attrs,
+ PTHREAD_CREATE_DETACHED) == 0);
+ opr_Verify(pthread_create(&tid, &attrs,
+ &VLRU_ScannerThread, NULL) == 0);
}
}
if (!VLRU_enabled)
return;
- osi_Assert(queue_IsNotOnQueue(&vp->vlru));
+ opr_Assert(queue_IsNotOnQueue(&vp->vlru));
vp->vlru.idx = VLRU_QUEUE_INVALID;
}
if (queue_IsNotOnQueue(&vp->vlru))
return;
- osi_Assert(V_attachFlags(vp) & VOL_ON_VLRU);
+ opr_Assert(V_attachFlags(vp) & VOL_ON_VLRU);
/* update the access timestamp */
vp->stats.last_get = FT_ApproxTime();
/* check to see if we've been asked to pause */
if (volume_LRU.scanner_state == VLRU_SCANNER_STATE_PAUSING) {
volume_LRU.scanner_state = VLRU_SCANNER_STATE_PAUSED;
- CV_BROADCAST(&volume_LRU.cv);
+ opr_cv_broadcast(&volume_LRU.cv);
do {
VOL_CV_WAIT(&volume_LRU.cv);
} while (volume_LRU.scanner_state == VLRU_SCANNER_STATE_PAUSED);
/* signal that scanner is down */
volume_LRU.scanner_state = VLRU_SCANNER_STATE_OFFLINE;
- CV_BROADCAST(&volume_LRU.cv);
+ opr_cv_broadcast(&volume_LRU.cv);
VOL_UNLOCK;
return NULL;
}
Volume ** salv_flag_vec = NULL;
int salv_vec_offset = 0;
- osi_Assert(idx == VLRU_QUEUE_MID || idx == VLRU_QUEUE_OLD);
+ opr_Assert(idx == VLRU_QUEUE_MID || idx == VLRU_QUEUE_OLD);
/* get exclusive access to two chains, and drop the glock */
VLRU_Wait_r(&volume_LRU.q[idx-1]);
/* no big deal if this allocation fails */
if (volume_LRU.q[idx].len) {
- salv_flag_vec = (Volume **) malloc(volume_LRU.q[idx].len * sizeof(Volume *));
+ salv_flag_vec = malloc(volume_LRU.q[idx].len * sizeof(Volume *));
}
now = FT_ApproxTime();
Volume * vp;
int i, locked = 1;
- osi_Assert(idx == VLRU_QUEUE_NEW || idx == VLRU_QUEUE_CANDIDATE);
+ opr_Assert(idx == VLRU_QUEUE_NEW || idx == VLRU_QUEUE_CANDIDATE);
/* gain exclusive access to the idx VLRU */
VLRU_Wait_r(&volume_LRU.q[idx]);
idx = vp->vlru.idx;
- osi_Assert(idx == VLRU_QUEUE_NEW);
+ opr_Assert(idx == VLRU_QUEUE_NEW);
if (vp->stats.last_get <= thresh) {
/* move to candidate pool */
static void
VLRU_BeginExclusive_r(struct VLRU_q * q)
{
- osi_Assert(q->busy == 0);
+ opr_Assert(q->busy == 0);
q->busy = 1;
}
static void
VLRU_EndExclusive_r(struct VLRU_q * q)
{
- osi_Assert(q->busy);
+ opr_Assert(q->busy);
q->busy = 0;
- CV_BROADCAST(&q->cv);
+ opr_cv_broadcast(&q->cv);
}
/* wait for another thread to end exclusive access on VLRU */
afs_uint32 ts_save;
int ret = 0;
- osi_Assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
+ opr_Assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
ts_save = vp->stats.last_get;
if (ts_save > thresh)
vp = NULL;
} else {
/* pull it off the VLRU */
- osi_Assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
+ opr_Assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
volume_LRU.q[VLRU_QUEUE_CANDIDATE].len--;
queue_Remove(&vp->vlru);
vp->vlru.idx = VLRU_QUEUE_INVALID;
volume_hdr_LRU.stats.used = howMany;
volume_hdr_LRU.stats.attached = 0;
hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader)));
- osi_Assert(hp != NULL);
+ opr_Assert(hp != NULL);
while (howMany--)
/* We are using ReleaseVolumeHeader to initialize the values on the header list
ReleaseVolumeHeader(hp++);
}
+/* get a volume header off of the volume header LRU.
+ *
+ * @return volume header
+ * @retval NULL no usable volume header is available on the LRU
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post for DAFS, if the returned header is associated with a volume, that
+ * volume is NOT in an exclusive state
+ *
+ * @internal volume package internal use only.
+ */
+#ifdef AFS_DEMAND_ATTACH_FS
+static struct volHeader*
+GetVolHeaderFromLRU(void)
+{
+ struct volHeader *hd = NULL, *qh, *nqh;
+ /* Usually, a volume in an exclusive state will not have its header on
+ * the LRU. However, it is possible for this to occur when a salvage
+ * request is received over FSSYNC, and possibly in other corner cases.
+ * So just skip over headers whose volumes are in an exclusive state. We
+ * could VWaitExclusiveState_r instead, but not waiting is faster and
+ * easier to do */
+ for (queue_Scan(&volume_hdr_LRU, qh, nqh, volHeader)) {
+ if (!qh->back || !VIsExclusiveState(V_attachState(qh->back))) {
+ queue_Remove(qh);
+ hd = qh;
+ break;
+ }
+ }
+ return hd;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+static struct volHeader*
+GetVolHeaderFromLRU(void)
+{
+ struct volHeader *hd = NULL;
+ if (queue_IsNotEmpty(&volume_hdr_LRU)) {
+ hd = queue_First(&volume_hdr_LRU, volHeader);
+ queue_Remove(hd);
+ }
+ return hd;
+}
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
/**
* get a volume header and attach it to the volume object.
*
if (programType != fileServer) {
/* for volume utilities, we allocate volHeaders as needed */
if (!vp->header) {
- hd = (struct volHeader *)calloc(1, sizeof(*vp->header));
- osi_Assert(hd != NULL);
+ hd = calloc(1, sizeof(*vp->header));
+ opr_Assert(hd != NULL);
vp->header = hd;
hd->back = vp;
#ifdef AFS_DEMAND_ATTACH_FS
* still available. pull it off the lru and return */
hd = vp->header;
queue_Remove(hd);
- osi_Assert(hd->back == vp);
+ opr_Assert(hd->back == vp);
#ifdef AFS_DEMAND_ATTACH_FS
V_attachFlags(vp) &= ~(VOL_HDR_IN_LRU);
#endif
} else {
- /* we need to grab a new element off the LRU */
- if (queue_IsNotEmpty(&volume_hdr_LRU)) {
- /* grab an element and pull off of LRU */
- hd = queue_First(&volume_hdr_LRU, volHeader);
- queue_Remove(hd);
- } else {
+ hd = GetVolHeaderFromLRU();
+ if (!hd) {
/* LRU is empty, so allocate a new volHeader
* this is probably indicative of a leak, so let the user know */
- hd = (struct volHeader *)calloc(1, sizeof(struct volHeader));
- osi_Assert(hd != NULL);
+ hd = calloc(1, sizeof(struct volHeader));
+ opr_Assert(hd != NULL);
if (!everLogged) {
Log("****Allocated more volume headers, probably leak****\n");
everLogged = 1;
* be sync'd out to disk */
#ifdef AFS_DEMAND_ATTACH_FS
- /* if hd->back were in an exclusive state, then
- * its volHeader would not be on the LRU... */
- osi_Assert(!VIsExclusiveState(V_attachState(hd->back)));
+ /* GetVolHeaderFromLRU had better not give us back a header
+ * with a volume in exclusive state... */
+ opr_Assert(!VIsExclusiveState(V_attachState(hd->back)));
#endif
if (hd->diskstuff.inUse) {
}
if (!VInit) {
- VolumeHashTable.Size = 1 << logsize;
- VolumeHashTable.Mask = VolumeHashTable.Size - 1;
+ VolumeHashTable.Size = opr_jhash_size(logsize);
+ VolumeHashTable.Mask = opr_jhash_mask(logsize);
} else {
/* we can't yet support runtime modification of this
* parameter. we'll need a configuration rwlock to
VolumeHashTable.Table = (VolumeHashChainHead *) calloc(VolumeHashTable.Size,
sizeof(VolumeHashChainHead));
- osi_Assert(VolumeHashTable.Table != NULL);
+ opr_Assert(VolumeHashTable.Table != NULL);
for (i=0; i < VolumeHashTable.Size; i++) {
queue_Init(&VolumeHashTable.Table[i]);
#ifdef AFS_DEMAND_ATTACH_FS
- CV_INIT(&VolumeHashTable.Table[i].chain_busy_cv, "vhash busy", CV_DEFAULT, 0);
+ opr_cv_init(&VolumeHashTable.Table[i].chain_busy_cv);
#endif /* AFS_DEMAND_ATTACH_FS */
}
}
* asynchronous hash chain reordering to finish.
*/
static void
-AddVolumeToHashTable(Volume * vp, int hashid)
+AddVolumeToHashTable(Volume * vp, VolumeId hashid)
{
VolumeHashChainHead * head;
head->len++;
vp->hashid = hashid;
queue_Append(head, vp);
- vp->vnodeHashOffset = VolumeHashOffset_r();
}
/**
* hint volume object.
*/
Volume *
-VLookupVolume_r(Error * ec, VolId volumeId, Volume * hint)
+VLookupVolume_r(Error * ec, VolumeId volumeId, Volume * hint)
{
int looks = 0;
Volume * vp, *np;
/* search the chain for this volume id */
for(queue_Scan(head, vp, np, Volume)) {
looks++;
- if ((vp->hashid == volumeId)) {
+ if (vp->hashid == volumeId) {
break;
}
}
static void
VHashBeginExclusive_r(VolumeHashChainHead * head)
{
- osi_Assert(head->busy == 0);
+ opr_Assert(head->busy == 0);
head->busy = 1;
}
static void
VHashEndExclusive_r(VolumeHashChainHead * head)
{
- osi_Assert(head->busy);
+ opr_Assert(head->busy);
head->busy = 0;
- CV_BROADCAST(&head->chain_busy_cv);
+ opr_cv_broadcast(&head->chain_busy_cv);
}
/**
static void
VVByPListBeginExclusive_r(struct DiskPartition64 * dp)
{
- osi_Assert(dp->vol_list.busy == 0);
+ opr_Assert(dp->vol_list.busy == 0);
dp->vol_list.busy = 1;
}
static void
VVByPListEndExclusive_r(struct DiskPartition64 * dp)
{
- osi_Assert(dp->vol_list.busy);
+ opr_Assert(dp->vol_list.busy);
dp->vol_list.busy = 0;
- CV_BROADCAST(&dp->vol_list.cv);
+ opr_cv_broadcast(&dp->vol_list.cv);
}
/**
void
VPrintCacheStats_r(void)
{
- afs_uint32 get_hi, get_lo, load_hi, load_lo;
struct VnodeClassInfo *vcp;
vcp = &VnodeClassInfo[vLarge];
Log("Large vnode cache, %d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
vcp = &VnodeClassInfo[vSmall];
Log("Small vnode cache,%d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
- SplitInt64(VStats.hdr_gets, get_hi, get_lo);
- SplitInt64(VStats.hdr_loads, load_hi, load_lo);
- Log("Volume header cache, %d entries, %d gets, %d replacements\n",
- VStats.hdr_cache_size, get_lo, load_lo);
+ Log("Volume header cache, %d entries, %"AFS_INT64_FMT" gets, "
+ "%"AFS_INT64_FMT" replacements\n",
+ VStats.hdr_cache_size, VStats.hdr_gets, VStats.hdr_loads);
}
void
#define ENUMTOSTRING(en) #en
#define ENUMCASE(en) \
- case en: \
- return ENUMTOSTRING(en); \
- break
+ case en: return ENUMTOSTRING(en)
static char *
vlru_idx_to_string(int idx)