2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
9 * Portions Copyright (c) 2006-2008 Sine Nomine Associates
15 Institution: The Information Technology Center, Carnegie-Mellon University
22 #include <afs/afssyscalls.h>
25 #define VolumeWriteable(vp) (V_type(vp)==readwriteVolume)
26 #define VolumeWriteable2(vol) (vol.type == readwriteVolume)
27 typedef bit32 FileOffset; /* Offset in this file */
28 #define Date afs_uint32
29 #include "daemon_com.h"
33 /** turn this on if you suspect a volume package locking bug */
34 #define VOL_LOCK_DEBUG 1
38 #define VOL_LOCK_ASSERT_HELD \
39 osi_Assert(vol_glock_holder == pthread_self())
40 #define VOL_LOCK_ASSERT_UNHELD \
41 osi_Assert(vol_glock_holder == 0)
42 #define _VOL_LOCK_SET_HELD \
43 vol_glock_holder = pthread_self()
44 #define _VOL_LOCK_SET_UNHELD \
46 #define VOL_LOCK_DBG_CV_WAIT_END \
48 VOL_LOCK_ASSERT_UNHELD; \
51 #define VOL_LOCK_DBG_CV_WAIT_BEGIN \
53 VOL_LOCK_ASSERT_HELD; \
54 _VOL_LOCK_SET_UNHELD; \
57 #define VOL_LOCK_ASSERT_HELD
58 #define VOL_LOCK_ASSERT_UNHELD
59 #define VOL_LOCK_DBG_CV_WAIT_BEGIN
60 #define VOL_LOCK_DBG_CV_WAIT_END
64 #ifdef AFS_PTHREAD_ENV
66 extern pthread_mutex_t vol_glock_mutex;
67 extern pthread_mutex_t vol_trans_mutex;
68 extern pthread_cond_t vol_put_volume_cond;
69 extern pthread_cond_t vol_sleep_cond;
70 extern pthread_cond_t vol_vinit_cond;
71 extern ih_init_params vol_io_params;
72 extern int vol_attach_threads;
74 extern pthread_t vol_glock_holder;
77 MUTEX_ENTER(&vol_glock_mutex); \
78 VOL_LOCK_ASSERT_UNHELD; \
83 VOL_LOCK_ASSERT_HELD; \
84 _VOL_LOCK_SET_UNHELD; \
85 MUTEX_EXIT(&vol_glock_mutex); \
87 #define VOL_CV_WAIT(cv) \
89 VOL_LOCK_DBG_CV_WAIT_BEGIN; \
90 CV_WAIT((cv), &vol_glock_mutex); \
91 VOL_LOCK_DBG_CV_WAIT_END; \
93 #else /* !VOL_LOCK_DEBUG */
94 #define VOL_LOCK MUTEX_ENTER(&vol_glock_mutex)
95 #define VOL_UNLOCK MUTEX_EXIT(&vol_glock_mutex)
96 #define VOL_CV_WAIT(cv) CV_WAIT((cv), &vol_glock_mutex)
97 #endif /* !VOL_LOCK_DEBUG */
99 #define VSALVSYNC_LOCK MUTEX_ENTER(&vol_salvsync_mutex)
100 #define VSALVSYNC_UNLOCK MUTEX_EXIT(&vol_salvsync_mutex)
101 #define VTRANS_LOCK MUTEX_ENTER(&vol_trans_mutex)
102 #define VTRANS_UNLOCK MUTEX_EXIT(&vol_trans_mutex)
103 #else /* AFS_PTHREAD_ENV */
106 #define VSALVSYNC_LOCK
107 #define VSALVSYNC_UNLOCK
109 #define VTRANS_UNLOCK
110 #endif /* AFS_PTHREAD_ENV */
113 * volume package program type enumeration.
116 fileServer = 1, /**< the fileserver process */
117 volumeUtility = 2, /**< any miscellaneous volume utility */
118 salvager = 3, /**< standalone whole-partition salvager */
119 salvageServer = 4, /**< dafs online salvager */
120 debugUtility = 5, /**< fssync-debug or similar utility */
121 volumeServer = 6, /**< the volserver process */
122 volumeSalvager = 7 /**< the standalone single-volume salvager */
124 extern ProgramType programType; /* The type of program using the package */
126 /* Some initialization parameters for the volume package */
127 /* Add new initialization parameters here */
128 extern int (*V_BreakVolumeCallbacks) (VolumeId);
129 extern int (*vol_PollProc) (void);
131 #define DOPOLL ((vol_PollProc)? (*vol_PollProc)() : 0)
133 #ifdef AFS_DEMAND_ATTACH_FS
135 * variable error return code based upon programType and DAFS presence
137 #define DAFS_VSALVAGE ((programType == fileServer) ? VSALVAGING : VSALVAGE)
139 #define DAFS_VSALVAGE (VSALVAGE)
142 struct versionStamp { /* Version stamp for critical volume files */
143 bit32 magic; /* Magic number */
144 bit32 version; /* Version number of this file, or software
145 * that created this file */
148 #ifdef AFS_DEMAND_ATTACH_FS
150 * demand attach volume state enumeration.
152 * @note values must be contiguous in order for VIsValidState() to work correctly
155 VOL_STATE_UNATTACHED = 0, /**< volume is unattached */
156 VOL_STATE_PREATTACHED = 1, /**< volume has been pre-attached */
157 VOL_STATE_ATTACHING = 2, /**< volume is transitioning to fully attached */
158 VOL_STATE_ATTACHED = 3, /**< volume has been fully attached */
159 VOL_STATE_UPDATING = 4, /**< volume is updating on-disk structures */
160 VOL_STATE_GET_BITMAP = 5, /**< volume is getting bitmap entries */
161 VOL_STATE_HDR_LOADING = 6, /**< volume is loading disk header */
162 VOL_STATE_HDR_ATTACHING = 7, /**< volume is getting a header from the LRU */
163 VOL_STATE_SHUTTING_DOWN = 8, /**< volume is shutting down */
164 VOL_STATE_GOING_OFFLINE = 9, /**< volume is going offline */
165 VOL_STATE_OFFLINING = 10, /**< volume is transitioning to offline */
166 VOL_STATE_DETACHING = 11, /**< volume is transitioning to detached */
167 VOL_STATE_SALVSYNC_REQ = 12, /**< volume is blocked on a salvsync request */
168 VOL_STATE_SALVAGING = 13, /**< volume is being salvaged */
169 VOL_STATE_ERROR = 14, /**< volume is in an error state */
170 VOL_STATE_VNODE_ALLOC = 15, /**< volume is busy allocating a new vnode */
171 VOL_STATE_VNODE_GET = 16, /**< volume is busy getting vnode disk data */
172 VOL_STATE_VNODE_CLOSE = 17, /**< volume is busy closing vnodes */
173 VOL_STATE_VNODE_RELEASE = 18, /**< volume is busy releasing vnodes */
174 VOL_STATE_VLRU_ADD = 19, /**< volume is busy being added to a VLRU queue */
175 VOL_STATE_DELETED = 20, /**< volume has been deleted by the volserver */
176 VOL_STATE_SALVAGE_REQ = 21, /**< volume has been requested to be salvaged,
177 * but is waiting for other users to go away
178 * so it can be offlined */
179 VOL_STATE_SCANNING_RXCALLS = 22, /**< volume is scanning vp->rx_call_list
180 * to interrupt RX calls */
181 /* please add new states directly above this line */
182 VOL_STATE_FREED = 23, /**< debugging aid */
183 VOL_STATE_COUNT = 24 /**< total number of valid states */
187 * V_attachFlags bits.
190 VOL_HDR_ATTACHED = 0x1, /**< volume header is attached to Volume struct */
191 VOL_HDR_LOADED = 0x2, /**< volume header contents are valid */
192 VOL_HDR_IN_LRU = 0x4, /**< volume header is in LRU */
193 VOL_IN_HASH = 0x8, /**< volume is in hash table */
194 VOL_ON_VBYP_LIST = 0x10, /**< volume is on VByP list */
195 VOL_IS_BUSY = 0x20, /**< volume is not to be free()d */
196 VOL_ON_VLRU = 0x40, /**< volume is on the VLRU */
197 VOL_HDR_DONTSALV = 0x80, /**< volume header DONTSALVAGE flag is set */
198 VOL_LOCKED = 0x100 /**< volume is disk-locked (@see VLockVolumeNB) */
201 /* VPrintExtendedCacheStats flags */
202 #define VOL_STATS_PER_CHAIN 0x1 /**< compute simple per-chain stats */
203 #define VOL_STATS_PER_CHAIN2 0x2 /**< compute per-chain stats that require scanning
204 * every element of the chain */
206 /* VLRU_SetOptions options */
207 #define VLRU_SET_THRESH 1
208 #define VLRU_SET_INTERVAL 2
209 #define VLRU_SET_MAX 3
210 #define VLRU_SET_ENABLED 4
216 VLRU_QUEUE_NEW = 0, /**< LRU queue for new volumes */
217 VLRU_QUEUE_MID = 1, /**< survivor generation */
218 VLRU_QUEUE_OLD = 2, /**< old generation */
219 VLRU_QUEUE_CANDIDATE = 3, /**< soft detach candidate pool */
220 VLRU_QUEUE_HELD = 4, /* volumes which are not allowed
221 * to be soft detached */
222 VLRU_QUEUE_INVALID = 5 /**< invalid queue id */
225 /* default scanner timing parameters */
226 #define VLRU_DEFAULT_OFFLINE_THRESH (60*60*2) /* 2 hours */
227 #define VLRU_DEFAULT_OFFLINE_INTERVAL (60*2) /* 2 minutes */
228 #define VLRU_DEFAULT_OFFLINE_MAX 8 /* 8 volumes */
232 * DAFS thread-specific options structure
234 typedef struct VThreadOptions {
235 int disallow_salvsync; /**< whether or not salvsync calls are allowed
236 * on this thread (deadlock prevention). */
238 extern pthread_key_t VThread_key;
239 extern VThreadOptions_t VThread_defaults;
241 #endif /* AFS_DEMAND_ATTACH_FS */
243 typedef struct VolumePackageOptions {
244 afs_uint32 nLargeVnodes; /**< size of large vnode cache */
245 afs_uint32 nSmallVnodes; /**< size of small vnode cache */
246 afs_uint32 volcache; /**< size of volume header cache */
248 afs_int32 canScheduleSalvage; /**< can we schedule salvages? (DAFS) */
249 /* (if 'no', we will just error out if we
251 afs_int32 canUseFSSYNC; /**< can we use the FSSYNC channel? */
252 afs_int32 canUseSALVSYNC; /**< can we use the SALVSYNC channel? (DAFS) */
253 afs_int32 unsafe_attach; /**< can we bypass checking the inUse vol
254 * header on attach? */
255 void (*interrupt_rxcall) (struct rx_call *call, afs_int32 error);
256 /**< callback to interrupt RX calls accessing
257 * a going-offline volume */
258 afs_int32 offline_timeout; /**< how long (in seconds) to wait before
259 * interrupting RX calls accessing a
260 * going-offline volume. -1 disables,
261 * 0 means immediately. */
262 afs_int32 offline_shutdown_timeout;
263 /**< how long (in seconds) to wait before
264 * interrupting RX calls accessing a
265 * going-offline volume during shutdown.
266 * -1 disables, 0 means immediately.
267 * Note that the timeout time is calculated
268 * once, when we encounter the first going-
269 * offline volume during shutdown. So if we
270 * encounter multiple going-offline volumes
271 * during shutdown, we will still only wait
272 * for this amount of time in total, not e.g.
273 * for each going-offline volume encountered. */
274 afs_int32 usage_threshold; /*< number of accesses before writing volume header */
275 afs_int32 usage_rate_limit; /*< minimum number of seconds before writing volume
276 * header, after usage_threshold is exceeded */
277 } VolumePackageOptions;
279 /* Magic numbers and version stamps for each type of file */
280 #define VOLUMEHEADERMAGIC ((bit32)0x88a1bb3c)
281 #define VOLUMEINFOMAGIC ((bit32)0x78a1b2c5)
282 #define SMALLINDEXMAGIC 0x99776655
283 #define LARGEINDEXMAGIC 0x88664433
284 #define MOUNTMAGIC 0x9a8b7c6d
285 #define ACLMAGIC 0x88877712
286 #define LINKTABLEMAGIC 0x99877712
288 #define VOLUMEHEADERVERSION 1
289 #define VOLUMEINFOVERSION 1
290 #define SMALLINDEXVERSION 1
291 #define LARGEINDEXVERSION 1
292 #define MOUNTVERSION 1
294 #define LINKTABLEVERSION 1
297 * Define whether we are keeping detailed statistics on volume dealings.
299 #define OPENAFS_VOL_STATS 1
301 #if OPENAFS_VOL_STATS
303 * Define various indices and counts used in keeping volume-level statistics.
305 #define VOL_STATS_NUM_RWINFO_FIELDS 4
307 #define VOL_STATS_SAME_NET 0 /*Within same site (total) */
308 #define VOL_STATS_SAME_NET_AUTH 1 /*Within same site (authenticated);
309 * (must be 1 more than above) */
310 #define VOL_STATS_DIFF_NET 2 /*From external site (total) */
311 #define VOL_STATS_DIFF_NET_AUTH 3 /*From external site (authenticated)
312 * (must be 1 more than above) */
314 #define VOL_STATS_NUM_TIME_RANGES 6
316 #define VOL_STATS_TIME_CAP_0 60 /*60 seconds */
317 #define VOL_STATS_TIME_CAP_1 600 /*10 minutes, in seconds */
318 #define VOL_STATS_TIME_CAP_2 3600 /*1 hour, in seconds */
319 #define VOL_STATS_TIME_CAP_3 86400 /*1 day, in seconds */
320 #define VOL_STATS_TIME_CAP_4 604800 /*1 week, in seconds */
322 #define VOL_STATS_NUM_TIME_FIELDS 6
324 #define VOL_STATS_TIME_IDX_0 0 /*0 secs to 60 secs */
325 #define VOL_STATS_TIME_IDX_1 1 /*1 min to 10 mins */
326 #define VOL_STATS_TIME_IDX_2 2 /*10 mins to 60 mins */
327 #define VOL_STATS_TIME_IDX_3 3 /*1 hr to 24 hrs */
328 #define VOL_STATS_TIME_IDX_4 4 /*1 day to 7 days */
329 #define VOL_STATS_TIME_IDX_5 5 /*Greater than 1 week */
330 #endif /* OPENAFS_VOL_STATS */
332 /* Volume header. This is the contents of the named file representing
333 * the volume. Read-only by the file server!
335 typedef struct VolumeHeader {
336 struct versionStamp stamp; /* Must be first field */
337 VolumeId id; /* Volume number */
338 VolumeId parent; /* Read-write volume number (or this volume
339 * number if this is a read-write volume) */
341 Inode smallVnodeIndex;
342 Inode largeVnodeIndex;
344 Inode volumeMountTable;
349 typedef struct VolumeDiskHeader {
350 struct versionStamp stamp; /* Must be first field */
351 VolumeId id; /* Volume number */
352 VolumeId parent; /* Read-write volume number (or this volume
353 * number if this is a read-write volume) */
354 afs_int32 volumeInfo_lo;
355 afs_int32 smallVnodeIndex_lo;
356 afs_int32 largeVnodeIndex_lo;
357 afs_int32 volumeAcl_lo;
358 afs_int32 volumeMountTable_lo;
359 afs_int32 volumeInfo_hi;
360 afs_int32 smallVnodeIndex_hi;
361 afs_int32 largeVnodeIndex_hi;
362 afs_int32 volumeAcl_hi;
363 afs_int32 volumeMountTable_hi;
364 afs_int32 linkTable_lo;
365 afs_int32 linkTable_hi;
366 /* If you add fields, add them before here and reduce the size of array */
368 } VolumeDiskHeader_t;
370 /* A vnode index file header */
371 struct IndexFileHeader {
372 struct versionStamp stamp;
376 /******************************************************************************/
377 /* Volume Data which is stored on disk and can also be maintained in memory. */
378 /******************************************************************************/
379 typedef struct VolumeDiskData {
380 struct versionStamp stamp; /* Must be first field */
381 VolumeId id; /* Volume id--unique over all systems */
382 #define VNAMESIZE 32 /* including 0 byte */
383 char name[VNAMESIZE]; /* Unofficial name for the volume */
384 byte inUse; /* Volume is being used (perhaps it is online),
385 * or the system crashed while it was used */
386 byte inService; /* Volume in service, not necessarily on line
387 * This bit is set by an operator/system
388 * programmer. Manually taking a volume offline
389 * always clears the inService bit. Taking
390 * it out of service also takes it offline */
391 byte blessed; /* Volume is administratively blessed with
392 * the ability to go on line. Set by a system
393 * administrator. Clearing this bit will
394 * take the volume offline */
395 byte needsSalvaged; /* Volume needs salvaged--an unrecoverable
396 * error occured to the volume. Note: a volume
397 * may still require salvage even if this
398 * flag isn't set--e.g. if a system crash
399 * occurred while the volume was on line. */
400 bit32 uniquifier; /* Next vnode uniquifier for this volume */
402 VolId parentId; /* Id of parent, if type==readonly */
403 VolId cloneId; /* Latest read-only clone, if type==readwrite,
404 * 0 if the volume has never been cloned. Note: the
405 * indicated volume does not necessarily exist (it
406 * may have been deleted since cloning). */
407 VolId backupId; /* Latest backup copy of this read write volume */
408 VolId restoredFromId; /* The id in the dump this volume was restored from--used simply
409 * to make sure that an incremental dump is not restored on top
410 * of something inappropriate: Note: this field itself is NEVER
412 byte needsCallback; /* Set by the salvager if anything was changed
413 * about the volume. Note: this is not set by
414 * clone/makebackups when setting the copy-on-write
415 * flag in directories; this flag is not seen by
417 #define DESTROY_ME 0xD3
418 byte destroyMe; /* If this is set to DESTROY_ME, then the salvager should destroy
419 * this volume; it is bogus (left over from an aborted volume move,
420 * for example). Note: if this flag is on, then inService should
421 * be OFF--only the salvager checks this flag */
422 #ifdef ALPHA_DUX40_ENV
423 #define DONT_SALVAGE 0xE6
424 #else /* ALPHA_DUX40_ENV */
425 #define DONT_SALVAGE 0xE5
426 #endif /* ALPHA_DUX40_ENV */
427 byte dontSalvage; /* If this is on, then don't bother salvaging this volume */
433 /* Administrative stuff */
434 int maxquota; /* Quota maximum, 1K blocks */
435 int minquota; /* Quota minimum, 1K blocks */
436 int maxfiles; /* Maximum number of files (i.e. inodes) */
437 bit32 accountNumber; /* Uninterpreted account number */
438 bit32 owner; /* The person administratively responsible
440 int reserved2[8]; /* Other administrative constraints */
442 /* Resource usage & statistics */
443 int filecount; /* Actual number of files */
444 int diskused; /* Actual disk space used, 1K blocks */
445 int dayUse; /* Metric for today's usage of this volume so far */
446 int weekUse[7]; /* Usage of the volume for the last week.
447 * weekUse[0] is for most recent complete 24 hour period
448 * of measurement; week[6] is 7 days ago */
449 Date dayUseDate; /* Date the dayUse statistics refer to; the week use stats
450 * are the preceding 7 days */
451 unsigned int volUpdateCounter; /*incremented at every update of volume*/
452 int reserved3[10]; /* Other stats here */
454 /* Server supplied dates */
455 Date creationDate; /* Creation date for a read/write
456 * volume; cloning date for original copy of
457 * a readonly volume (replicated volumes have
458 * the same creation date) */
459 Date accessDate; /* Last access time by a user, large granularity */
460 Date updateDate; /* Last modification by user */
461 Date expirationDate; /* 0 if it never expires */
462 Date backupDate; /* last time a backup clone was taken */
464 /* Time that this copy of this volume was made. NEVER backed up. This field is only
465 * set when the copy is created */
468 #if OPENAFS_VOL_STATS
469 bit32 stat_initialized; /*Are the stat fields below set up? */
473 #endif /* OPENAFS_VOL_STATS */
477 char offlineMessage[VMSGSIZE]; /* Why the volume is offline */
478 #if OPENAFS_VOL_STATS
479 #define VOL_STATS_BYTES 128
481 * Keep per-volume aggregate statistics on type and distance of access,
482 * along with authorship info.
484 bit32 stat_reads[VOL_STATS_NUM_RWINFO_FIELDS];
485 bit32 stat_writes[VOL_STATS_NUM_RWINFO_FIELDS];
486 bit32 stat_fileSameAuthor[VOL_STATS_NUM_TIME_FIELDS];
487 bit32 stat_fileDiffAuthor[VOL_STATS_NUM_TIME_FIELDS];
488 bit32 stat_dirSameAuthor[VOL_STATS_NUM_TIME_FIELDS];
489 bit32 stat_dirDiffAuthor[VOL_STATS_NUM_TIME_FIELDS];
491 char motd[VMSGSIZE]; /* Volume "message of the day" */
492 #endif /* OPENAFS_VOL_STATS */
497 /**************************************/
498 /* Memory resident volume information */
499 /**************************************/
502 * global volume package stats.
504 typedef struct VolPkgStats {
505 #ifdef AFS_DEMAND_ATTACH_FS
508 * extended volume package statistics
512 afs_uint32 state_levels[VOL_STATE_COUNT]; /**< volume state transition counters */
515 afs_uint64 hash_looks; /**< number of hash chain element traversals */
516 afs_uint64 hash_reorders; /**< number of hash chain reorders */
517 afs_uint64 salvages; /**< online salvages since fileserver start */
518 afs_uint64 vol_ops; /**< volume operations since fileserver start */
519 #endif /* AFS_DEMAND_ATTACH_FS */
521 afs_uint64 hdr_loads; /**< header loads from disk */
522 afs_uint64 hdr_gets; /**< header pulls out of LRU */
523 afs_uint64 attaches; /**< volume attaches since fileserver start */
524 afs_uint64 soft_detaches; /**< soft detach ops since fileserver start */
526 /* configuration parameters */
527 afs_uint32 hdr_cache_size; /**< size of volume header cache */
529 extern VolPkgStats VStats;
532 * volume header cache supporting structures
534 struct volume_hdr_LRU_stats {
540 struct volume_hdr_LRU_t {
542 struct volume_hdr_LRU_stats stats;
544 extern struct volume_hdr_LRU_t volume_hdr_LRU;
547 * volume hash chain supporting structures
549 typedef struct VolumeHashChainHead {
550 struct rx_queue queue;
552 /* someday we could put a per-chain lock here... */
553 #ifdef AFS_DEMAND_ATTACH_FS
557 /* per-chain statistics */
562 pthread_cond_t chain_busy_cv;
563 #endif /* AFS_DEMAND_ATTACH_FS */
564 } VolumeHashChainHead;
566 typedef struct VolumeHashTable {
569 VolumeHashChainHead * Table;
571 extern VolumeHashTable_t VolumeHashTable;
573 struct VolumeHashChainStats {
574 afs_int32 table_size;
576 #ifdef AFS_DEMAND_ATTACH_FS
577 afs_int32 chain_cacheCheck;
578 afs_int32 chain_busy;
579 afs_uint64 chain_looks;
580 afs_uint64 chain_gets;
581 afs_uint64 chain_reorders;
586 #ifdef AFS_DEMAND_ATTACH_FS
588 * DAFS extended per-volume statistics.
590 * @note this data lives across the entire
591 * lifetime of the fileserver process
593 typedef struct VolumeStats {
595 afs_uint64 hash_lookups; /**< hash table lookups */
596 afs_uint64 hash_short_circuits; /**< short circuited hash lookups (due to cacheCheck) */
597 afs_uint64 hdr_loads; /**< header loads from disk */
598 afs_uint64 hdr_gets; /**< header pulls out of LRU */
599 afs_uint16 attaches; /**< attaches of this volume since fileserver start */
600 afs_uint16 soft_detaches; /**< soft detaches of this volume */
601 afs_uint16 salvages; /**< online salvages since fileserver start */
602 afs_uint16 vol_ops; /**< volume operations since fileserver start */
605 afs_uint32 last_attach; /**< unix timestamp of last VAttach */
606 afs_uint32 last_get; /**< unix timestamp of last VGet/VHold */
607 afs_uint32 last_promote; /**< unix timestamp of last VLRU promote/demote */
608 afs_uint32 last_hdr_get; /**< unix timestamp of last GetVolumeHeader() */
609 afs_uint32 last_hdr_load; /**< unix timestamp of last LoadVolumeHeader() */
610 afs_uint32 last_salvage; /**< unix timestamp of last initiation of an online salvage */
611 afs_uint32 last_salvage_req; /**< unix timestamp of last SALVSYNC request */
612 afs_uint32 last_vol_op; /**< unix timestamp of last volume operation */
616 #define SALVAGE_PRIO_UPDATE_INTERVAL 3 /**< number of seconds between prio updates */
617 #define SALVAGE_COUNT_MAX 16 /**< number of online salvages we
618 * allow before moving the volume
619 * into a permanent error state
621 * once this threshold is reached,
622 * the operator will have to manually
623 * issue a 'bos salvage' to bring
624 * the volume back online
628 * DAFS online salvager state.
630 typedef struct VolumeOnlineSalvage {
631 afs_uint32 prio; /**< number of VGetVolume's since salvage requested */
632 int reason; /**< reason for requesting online salvage */
633 byte requested; /**< flag specifying that salvage should be scheduled */
634 byte scheduled; /**< flag specifying whether online salvage scheduled */
635 byte scheduling; /**< if nonzero, this volume has entered
636 * VCheckSalvage(), so if we recurse into
637 * VCheckSalvage() with this set, exit immediately
638 * to avoid recursing forever */
639 byte reserved[1]; /**< padding */
640 } VolumeOnlineSalvage;
643 * DAFS Volume LRU state.
645 typedef struct VolumeVLRUState {
646 struct rx_queue lru; /**< VLRU queue for this generation */
647 VLRUQueueName idx; /**< VLRU generation index */
649 #endif /* AFS_DEMAND_ATTACH_FS */
652 * node for a volume's rx_call_list.
656 struct rx_call *call;
659 typedef struct Volume {
660 struct rx_queue q; /* Volume hash chain pointers */
661 VolumeId hashid; /* Volume number -- for hash table lookup */
662 struct volHeader *header; /* Cached disk data */
663 Device device; /* Unix device for the volume */
664 struct DiskPartition64
665 *partition; /* Information about the Unix partition */
667 IHandle_t *handle; /* Unix inode holding this index */
668 byte *bitmap; /* Index bitmap */
669 afs_uint32 bitmapSize; /* length of bitmap, in bytes */
670 afs_uint32 bitmapOffset; /* Which byte address of the first long to
671 * start search from in bitmap */
672 } vnodeIndex[nVNODECLASSES];
673 IHandle_t *linkHandle;
674 Unique nextVnodeUnique; /* Derived originally from volume uniquifier.
675 * This is the actual next version number to
676 * assign; the uniquifier is bumped by 200 and
677 * and written to disk every 200 file creates
678 * If the volume is shutdown gracefully, the
679 * uniquifier should be rewritten with the
680 * value nextVnodeVersion */
681 IHandle_t *diskDataHandle; /* Unix inode holding general volume info */
682 bit16 vnodeHashOffset; /* Computed by HashOffset function in vnode.h.
683 * Assigned to the volume when initialized.
684 * Added to vnode number for hash table index */
685 byte shuttingDown; /* This volume is going to be detached */
686 byte goingOffline; /* This volume is going offline */
687 bit32 cacheCheck; /* Online sequence number to be used to invalidate vnode cache entries
688 * that stayed around while a volume was offline */
689 short nUsers; /* Number of users of this volume header */
690 #define VOL_PUTBACK 1
691 #define VOL_PUTBACK_DELETE 2
692 byte needsPutBack; /* For a volume utility, this flag is set to VOL_PUTBACK if we
693 * need to give the volume back when we detach it. The server has
694 * certain modes where it doesn't detach the volume, and
695 * if we give it back spuriously, the server aborts. If set to
696 * VOL_PUTBACK_DELETE, it indicates that we need to tell the
697 * fileserver that the volume is gone entirely, instead of just
698 * giving the volume back to the fileserver. This field
699 * is meaningless on the file server */
700 byte specialStatus; /* An error code to return on VGetVolume: the
701 * volume is unavailable for the reason quoted,
702 * currently VBUSY or VMOVED */
703 afs_uint32 checkoutMode; /* for volume utilities, mode number for current checkout */
704 afs_uint32 updateTime; /* Time that this volume was put on the updated
705 * volume list--the list of volumes that will be
706 * salvaged should the file server crash */
707 struct rx_queue vnode_list; /**< linked list of cached vnodes for this volume */
708 struct rx_queue rx_call_list; /**< linked list of split RX calls using this
709 * volume (fileserver only) */
710 #ifdef AFS_DEMAND_ATTACH_FS
711 VolState attach_state; /* what stage of attachment has been completed */
712 afs_uint32 attach_flags; /* flags related to attachment state */
713 pthread_cond_t attach_cv; /* state change condition variable */
714 short nWaiters; /* volume package internal ref count */
715 int chainCacheCheck; /* Volume hash chain cache check */
716 struct rx_queue vol_list; /* per-partition volume list (VByPList) */
718 VolumeOnlineSalvage salvage; /* online salvager state */
719 VolumeStats stats; /* per-volume statistics */
720 VolumeVLRUState vlru; /* state specific to the VLRU */
721 FSSYNC_VolOp_info * pending_vol_op; /* fssync command info for any pending vol ops */
722 #endif /* AFS_DEMAND_ATTACH_FS */
723 int usage_bumps_outstanding; /**< to rate limit the usage update i/o by accesses */
724 int usage_bumps_next_write; /**< to rate limit the usage update i/o by time */
729 VolumeDiskData diskstuff; /* General volume info read from disk */
730 Volume *back; /* back pointer to current volume structure */
733 /* These macros are used to export fields within the volume header. This was added
734 to facilitate changing the actual representation */
736 #define V_device(vp) ((vp)->device)
737 #define V_partition(vp) ((vp)->partition)
738 #define V_diskDataHandle(vp) ((vp)->diskDataHandle)
739 #define V_vnodeIndex(vp) ((vp)->vnodeIndex)
740 #define V_nextVnodeUnique(vp) ((vp)->nextVnodeUnique)
741 #define V_linkHandle(vp) ((vp)->linkHandle)
742 #define V_checkoutMode(vp) ((vp)->checkoutMode)
743 #ifdef AFS_DEMAND_ATTACH_FS
744 #define V_attachState(vp) ((vp)->attach_state)
745 #define V_attachFlags(vp) ((vp)->attach_flags)
746 #define V_attachCV(vp) ((vp)->attach_cv)
747 #endif /* AFS_DEMAND_ATTACH_FS */
749 /* N.B. V_id must be this, rather than vp->id, or some programs will break, probably */
750 #define V_stamp(vp) ((vp)->header->diskstuff.stamp)
751 #define V_id(vp) ((vp)->header->diskstuff.id)
752 #define V_name(vp) ((vp)->header->diskstuff.name)
753 #define V_inUse(vp) ((vp)->header->diskstuff.inUse)
754 #define V_inService(vp) ((vp)->header->diskstuff.inService)
755 #define V_blessed(vp) ((vp)->header->diskstuff.blessed)
756 #define V_needsSalvaged(vp) ((vp)->header->diskstuff.needsSalvaged)
757 #define V_uniquifier(vp) ((vp)->header->diskstuff.uniquifier)
758 #define V_type(vp) ((vp)->header->diskstuff.type)
759 #define V_parentId(vp) ((vp)->header->diskstuff.parentId)
760 #define V_cloneId(vp) ((vp)->header->diskstuff.cloneId)
761 #define V_backupId(vp) ((vp)->header->diskstuff.backupId)
762 #define V_restoredFromId(vp) ((vp)->header->diskstuff.restoredFromId)
763 #define V_needsCallback(vp) ((vp)->header->diskstuff.needsCallback)
764 #define V_destroyMe(vp) ((vp)->header->diskstuff.destroyMe)
765 #define V_dontSalvage(vp) ((vp)->header->diskstuff.dontSalvage)
766 #define V_maxquota(vp) ((vp)->header->diskstuff.maxquota)
767 #define V_minquota(vp) ((vp)->header->diskstuff.minquota)
768 #define V_maxfiles(vp) ((vp)->header->diskstuff.maxfiles)
769 #define V_accountNumber(vp) ((vp)->header->diskstuff.accountNumber)
770 #define V_owner(vp) ((vp)->header->diskstuff.owner)
771 #define V_filecount(vp) ((vp)->header->diskstuff.filecount)
772 #define V_diskused(vp) ((vp)->header->diskstuff.diskused)
773 #define V_dayUse(vp) ((vp)->header->diskstuff.dayUse)
774 #define V_weekUse(vp) ((vp)->header->diskstuff.weekUse)
775 #define V_dayUseDate(vp) ((vp)->header->diskstuff.dayUseDate)
776 #define V_creationDate(vp) ((vp)->header->diskstuff.creationDate)
777 #define V_accessDate(vp) ((vp)->header->diskstuff.accessDate)
778 #define V_updateDate(vp) ((vp)->header->diskstuff.updateDate)
779 #define V_expirationDate(vp) ((vp)->header->diskstuff.expirationDate)
780 #define V_backupDate(vp) ((vp)->header->diskstuff.backupDate)
781 #define V_copyDate(vp) ((vp)->header->diskstuff.copyDate)
782 #define V_offlineMessage(vp) ((vp)->header->diskstuff.offlineMessage)
783 #define V_disk(vp) ((vp)->header->diskstuff)
784 #define V_motd(vp) ((vp)->header->diskstuff.motd)
785 #if OPENAFS_VOL_STATS
786 #define V_stat_initialized(vp) ((vp)->header->diskstuff.stat_initialized)
787 #define V_stat_area(vp) (((vp)->header->diskstuff.stat_reads))
788 #define V_stat_reads(vp, idx) (((vp)->header->diskstuff.stat_reads)[idx])
789 #define V_stat_writes(vp, idx) (((vp)->header->diskstuff.stat_writes)[idx])
790 #define V_stat_fileSameAuthor(vp, idx) (((vp)->header->diskstuff.stat_fileSameAuthor)[idx])
791 #define V_stat_fileDiffAuthor(vp, idx) (((vp)->header->diskstuff.stat_fileDiffAuthor)[idx])
792 #define V_stat_dirSameAuthor(vp, idx) (((vp)->header->diskstuff.stat_dirSameAuthor)[idx])
793 #define V_stat_dirDiffAuthor(vp, idx) (((vp)->header->diskstuff.stat_dirDiffAuthor)[idx])
794 #endif /* OPENAFS_VOL_STATS */
795 #define V_volUpCounter(vp) ((vp)->header->diskstuff.volUpdateCounter)
797 /* File offset computations. The offset values in the volume header are
798 computed with these macros -- when the file is written only!! */
799 #define VOLUME_MOUNT_TABLE_OFFSET(Volume) (sizeof (VolumeDiskData))
800 #define VOLUME_BITMAP_OFFSET(Volume) \
801 (sizeof (VolumeDiskData) + (Volume)->disk.mountTableSize)
804 extern char *VSalvageMessage; /* Canonical message when a volume is forced
806 extern Volume *VGetVolume(Error * ec, Error * client_ec, VolId volumeId);
807 extern Volume *VGetVolumeWithCall(Error * ec, Error * client_ec, VolId volumeId,
808 const struct timespec *ts, struct VCallByVol *cbv);
809 extern Volume *VGetVolume_r(Error * ec, VolId volumeId);
810 extern void VPutVolume(Volume *);
811 extern void VPutVolumeWithCall(Volume *vp, struct VCallByVol *cbv);
812 extern void VPutVolume_r(Volume *);
813 extern void VOffline(Volume * vp, char *message);
814 extern void VOffline_r(Volume * vp, char *message);
815 extern int VConnectFS(void);
816 extern int VConnectFS_r(void);
817 extern void VDisconnectFS(void);
818 extern void VDisconnectFS_r(void);
819 extern int VChildProcReconnectFS(void);
820 extern Volume *VAttachVolume(Error * ec, VolumeId volumeId, int mode);
821 extern Volume *VAttachVolume_r(Error * ec, VolumeId volumeId, int mode);
822 extern Volume *VCreateVolume(Error * ec, char *partname, VolId volumeId,
824 extern Volume *VCreateVolume_r(Error * ec, char *partname, VolId volumeId,
826 extern int VAllocBitmapEntry(Error * ec, Volume * vp,
827 struct vnodeIndex *index);
828 extern int VAllocBitmapEntry_r(Error * ec, Volume * vp,
829 struct vnodeIndex *index, int flags);
830 extern void VFreeBitMapEntry(Error * ec, Volume *vp, struct vnodeIndex *index,
832 extern void VFreeBitMapEntry_r(Error * ec, Volume *vp, struct vnodeIndex *index,
833 unsigned bitNumber, int flags);
834 extern int VolumeNumber(char *name);
835 extern char *VolumeExternalName(VolumeId volumeId);
836 extern int VolumeExternalName_r(VolumeId volumeId, char *name, size_t len);
837 extern Volume *VAttachVolumeByName(Error * ec, char *partition, char *name,
839 extern Volume *VAttachVolumeByName_r(Error * ec, char *partition, char *name,
841 extern void VShutdown(void);
842 extern void VSetTranquil(void);
843 extern void VUpdateVolume(Error * ec, Volume * vp);
844 extern void VUpdateVolume_r(Error * ec, Volume * vp, int flags);
845 extern void VAddToVolumeUpdateList(Error * ec, Volume * vp);
846 extern void VAddToVolumeUpdateList_r(Error * ec, Volume * vp);
847 extern void VDetachVolume(Error * ec, Volume * vp);
848 extern void VDetachVolume_r(Error * ec, Volume * vp);
849 extern void VForceOffline(Volume * vp);
850 extern void VForceOffline_r(Volume * vp, int flags);
851 extern void VBumpVolumeUsage(Volume * vp);
852 extern void VBumpVolumeUsage_r(Volume * vp);
853 extern void VSetDiskUsage(void);
854 extern void VPrintCacheStats(void);
855 extern void VReleaseVnodeFiles_r(Volume * vp);
856 extern void VCloseVnodeFiles_r(Volume * vp);
857 extern struct DiskPartition64 *VGetPartition(char *name, int abortp);
858 extern struct DiskPartition64 *VGetPartition_r(char *name, int abortp);
859 extern void VOptDefaults(ProgramType pt, VolumePackageOptions * opts);
860 extern int VInitVolumePackage2(ProgramType pt, VolumePackageOptions * opts);
861 extern int VInitAttachVolumes(ProgramType pt);
862 extern void DiskToVolumeHeader(VolumeHeader_t * h, VolumeDiskHeader_t * dh);
863 extern void VolumeHeaderToDisk(VolumeDiskHeader_t * dh, VolumeHeader_t * h);
864 extern void AssignVolumeName(VolumeDiskData * vol, char *name, char *ext);
865 extern void VTakeOffline_r(Volume * vp);
866 extern void VTakeOffline(Volume * vp);
867 extern Volume * VLookupVolume_r(Error * ec, VolId volumeId, Volume * hint);
868 extern void VGetVolumePath(Error * ec, VolId volumeId, char **partitionp,
870 extern char *vol_DevName(dev_t adev, char *wpath);
871 extern afs_int32 VIsGoingOffline(struct Volume *vp);
874 extern void VLockFileInit(struct VLockFile *lf, const char *path);
875 extern void VLockFileReinit(struct VLockFile *lf);
876 extern int VLockFileLock(struct VLockFile *lf, afs_uint32 offset,
877 int locktype, int nonblock);
878 extern void VLockFileUnlock(struct VLockFile *lf, afs_uint32 offset);
880 #ifdef AFS_DEMAND_ATTACH_FS
881 extern Volume *VPreAttachVolumeByName(Error * ec, char *partition, char *name);
882 extern Volume *VPreAttachVolumeByName_r(Error * ec, char *partition, char *name);
883 extern Volume *VPreAttachVolumeById_r(Error * ec, char * partition,
885 extern Volume *VPreAttachVolumeByVp_r(Error * ec, struct DiskPartition64 * partp,
886 Volume * vp, VolId volume_id);
887 extern Volume *VGetVolumeByVp_r(Error * ec, Volume * vp);
888 extern int VShutdownByPartition_r(struct DiskPartition64 * dp);
889 extern int VShutdownVolume_r(Volume * vp);
890 extern int VConnectSALV(void);
891 extern int VConnectSALV_r(void);
892 extern int VReconnectSALV(void);
893 extern int VReconnectSALV_r(void);
894 extern int VDisconnectSALV(void);
895 extern int VDisconnectSALV_r(void);
896 extern void VPrintExtendedCacheStats(int flags);
897 extern void VPrintExtendedCacheStats_r(int flags);
898 extern void VLRU_SetOptions(int option, afs_uint32 val);
899 extern int VSetVolHashSize(int logsize);
900 extern int VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags);
901 extern int VUpdateSalvagePriority_r(Volume * vp);
902 extern int VRegisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
903 extern int VDeregisterVolOp_r(Volume * vp);
904 extern void VCancelReservation_r(Volume * vp);
905 extern int VChildProcReconnectFS_r(void);
906 extern void VOfflineForVolOp_r(Error *ec, Volume *vp, char *message);
907 #endif /* AFS_DEMAND_ATTACH_FS */
909 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
911 extern void VDiskLockInit(struct VDiskLock *dl, struct VLockFile *lf,
913 extern int VGetDiskLock(struct VDiskLock *dl, int locktype, int nonblock);
914 extern void VReleaseDiskLock(struct VDiskLock *dl, int locktype);
915 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
916 extern int VVolOpLeaveOnline_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
917 extern int VVolOpLeaveOnlineNoHeader_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
918 extern int VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
920 extern void VPurgeVolume(Error * ec, Volume * vp);
922 extern afs_int32 VCanScheduleSalvage(void);
923 extern afs_int32 VCanUseFSSYNC(void);
924 extern afs_int32 VCanUseSALVSYNC(void);
925 extern afs_int32 VCanUnsafeAttach(void);
926 extern afs_int32 VReadVolumeDiskHeader(VolumeId volid,
927 struct DiskPartition64 * dp,
928 VolumeDiskHeader_t * hdr);
929 extern afs_int32 VWriteVolumeDiskHeader(VolumeDiskHeader_t * hdr,
930 struct DiskPartition64 * dp);
931 extern afs_int32 VCreateVolumeDiskHeader(VolumeDiskHeader_t * hdr,
932 struct DiskPartition64 * dp);
933 extern afs_int32 VDestroyVolumeDiskHeader(struct DiskPartition64 * dp,
934 VolumeId volid, VolumeId parent);
937 * VWalkVolumeHeaders header callback.
939 * @param[in] dp disk partition
940 * @param[in] name full path to the .vol header file
941 * @param[in] hdr the header data that was read from the .vol header
942 * @param[in] last 1 if this is the last attempt to read the vol header, 0
943 * otherwise. DAFS VWalkVolumeHeaders will retry reading the
944 * header once, if a non-fatal error occurs when reading the
945 * header, or if this function returns a positive error code.
946 * So, if there is a problem, this function will be called
947 * first with last=0, then with last=1, then the error function
948 * callback will be called. For non-DAFS, this is always 1.
949 * @param[in] rock the rock passed to VWalkVolumeHeaders
951 * @return operation status
953 * @retval negative a fatal error that should stop the walk immediately
954 * @retval positive an error with the volume header was encountered; the walk
955 * should continue, but the error function should be called on this
958 * @see VWalkVolumeHeaders
960 typedef int (*VWalkVolFunc)(struct DiskPartition64 *dp, const char *name,
961 struct VolumeDiskHeader *hdr, int last,
964 * VWalkVolumeHeaders error callback.
966 * This is called from VWalkVolumeHeaders when an invalid or otherwise
967 * problematic volume header is encountered. It is typically implemented as a
968 * wrapper to unlink the .vol file.
970 * @param[in] dp disk partition
971 * @param[in] name full path to the .vol header file
972 * @param[in] hdr header read in from the .vol file, or NULL if it could not
974 * @param[in] rock rock passed to VWalkVolumeHeaders
976 * @see VWalkVolumeHeaders
978 typedef void (*VWalkErrFunc)(struct DiskPartition64 *dp, const char *name,
979 struct VolumeDiskHeader *hdr, void *rock);
980 extern int VWalkVolumeHeaders(struct DiskPartition64 *dp, const char *partpath,
981 VWalkVolFunc volfunc, VWalkErrFunc errfunc,
984 /* Naive formula relating number of file size to number of 1K blocks in file */
985 /* Note: we charge 1 block for 0 length files so the user can't store
986 an inifite number of them; for most files, we give him the inode, vnode,
987 and indirect block overhead, for FREE! */
988 #define nBlocks(bytes) ((afs_sfsize_t)((bytes) == 0? 1: (((afs_sfsize_t)(bytes))+1023)/1024))
990 /* Client process id -- file server sends a Check volumes signal back to the client at this pid */
991 #define CLIENTPID "/vice/vol/clientpid"
993 /* Modes of attachment, for VAttachVolume[ByName] to convey to the file server */
994 #define V_READONLY 1 /* Absolutely no updates will be done to the volume */
995 #define V_CLONE 2 /* Cloning the volume: if it is read/write, then directory
996 * version numbers will change. Header will be updated. If
997 * the volume is read-only, the file server may continue to
998 * server it; it may also continue to server it in read/write
999 * mode if the writes are deferred */
1000 #define V_VOLUPD 3 /* General update or volume purge is possible. Volume must
1002 #define V_DUMP 4 /* A dump of the volume is requested; the volume can be served
1003 * read-only during this time */
1004 #define V_SECRETLY 5 /* Secret attach of the volume. This is used to attach a volume
1005 * which the file server doesn't know about--and which it shouldn't
1006 * know about yet, since the volume has just been created and
1007 * is somewhat bogus. Required to make sure that a file server
1008 * never knows about more than one copy of the same volume--when
1009 * a volume is moved from one partition to another on a single
1011 #define V_PEEK 6 /* "Peek" at the volume without telling the fileserver. This is
1012 * similar to V_SECRETLY, but read-only. It is used in cases where
1013 * not impacting fileserver performance is more important than
1014 * getting the most recent data. */
1018 /* VUpdateVolume_r flags */
1019 #define VOL_UPDATE_WAIT 0x1 /* for demand attach, wait for other exclusive ops to end */
1020 #define VOL_UPDATE_NOFORCEOFF 0x2 /* don't force offline on failure. this is to prevent
1021 * infinite recursion between vupdate and vforceoff */
1023 /* VForceOffline_r flags */
1024 #define VOL_FORCEOFF_NOUPDATE 0x1 /* don't force update on forceoff. this is to prevent
1025 * infinite recursion between vupdate and vforceoff */
1027 /* VSyncVolume_r flags */
1028 #define VOL_SYNC_WAIT 0x1 /* for demand attach, wait for other exclusive ops to end */
1030 /* VAllocBitmapEntry_r flags */
1031 #define VOL_ALLOC_BITMAP_WAIT 0x1 /* for demand attach, wait for other exclusive ops to end */
1033 /* VFreeBitMapEntry_r flags */
1034 #define VOL_FREE_BITMAP_WAIT 0x1 /* for demand attach, wait for other exclusive ops to end */
1036 /* VRequestSalvage_r flags */
1037 #define VOL_SALVAGE_NO_OFFLINE 0x1 /* we do not need to wait to offline the volume; it has
1038 * not been fully attached */
1041 #if defined(NEARINODE_HINT)
1042 #define V_pref(vp,nearInode) nearInodeHash(V_id(vp),(nearInode)); (nearInode) %= V_partition(vp)->f_files
1044 #define V_pref(vp,nearInode) nearInode = 0
1045 #endif /* NEARINODE_HINT */
1047 hdr_static_inline(unsigned int)
1048 afs_printable_VolumeId_u(VolumeId d) { return (unsigned int) d; }
1050 hdr_static_inline(unsigned int)
1051 afs_printable_VnodeId_u(VnodeId d) { return (unsigned int) d; }
1053 #endif /* __volume_h */