2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
13 Institution: The Information Technology Center, Carnegie-Mellon University
20 #include <afs/afssyscalls.h>
23 #define VolumeWriteable(vp) (V_type(vp)==readwriteVolume)
24 #define VolumeWriteable2(vol) (vol.type == readwriteVolume)
25 typedef bit32 FileOffset; /* Offset in this file */
26 #define Date afs_uint32
28 #ifdef AFS_PTHREAD_ENV
31 extern pthread_mutex_t vol_glock_mutex;
32 extern pthread_mutex_t vol_attach_mutex;
33 extern pthread_mutex_t vol_fsync_mutex;
34 extern pthread_mutex_t vol_trans_mutex;
35 extern pthread_cond_t vol_put_volume_cond;
36 extern pthread_cond_t vol_sleep_cond;
37 #define VATTACH_LOCK \
38 assert(pthread_mutex_lock(&vol_attach_mutex) == 0)
39 #define VATTACH_UNLOCK \
40 assert(pthread_mutex_unlock(&vol_attach_mutex) == 0)
42 assert(pthread_mutex_lock(&vol_glock_mutex) == 0)
44 assert(pthread_mutex_unlock(&vol_glock_mutex) == 0)
46 assert(pthread_mutex_lock(&vol_fsync_mutex) == 0)
47 #define VFSYNC_UNLOCK \
48 assert(pthread_mutex_unlock(&vol_fsync_mutex) == 0)
50 assert(pthread_mutex_lock(&vol_trans_mutex) == 0)
51 #define VTRANS_UNLOCK \
52 assert(pthread_mutex_unlock(&vol_trans_mutex) == 0)
53 #else /* AFS_PTHREAD_ENV */
55 #define VATTACH_UNLOCK
62 #endif /* AFS_PTHREAD_ENV */
64 typedef enum { fileServer, volumeUtility, salvager } ProgramType;
65 extern ProgramType programType; /* The type of program using the package */
67 /* Some initialization parameters for the volume package */
68 /* Add new initialization parameters here */
69 extern int (*V_BreakVolumeCallbacks) ();
70 extern int (*vol_PollProc) ();
71 #define DOPOLL ((vol_PollProc)? (*vol_PollProc)() : 0)
73 struct versionStamp { /* Version stamp for critical volume files */
74 bit32 magic; /* Magic number */
75 bit32 version; /* Version number of this file, or software
76 * that created this file */
79 /* Magic numbers and version stamps for each type of file */
80 #define VOLUMEHEADERMAGIC ((bit32)0x88a1bb3c)
81 #define VOLUMEINFOMAGIC ((bit32)0x78a1b2c5)
82 #define SMALLINDEXMAGIC 0x99776655
83 #define LARGEINDEXMAGIC 0x88664433
84 #define MOUNTMAGIC 0x9a8b7c6d
85 #define ACLMAGIC 0x88877712
86 #define LINKTABLEMAGIC 0x99877712
88 #define VOLUMEHEADERVERSION 1
89 #define VOLUMEINFOVERSION 1
90 #define SMALLINDEXVERSION 1
91 #define LARGEINDEXVERSION 1
92 #define MOUNTVERSION 1
94 #define LINKTABLEVERSION 1
97 * Define whether we are keeping detailed statistics on volume dealings.
99 #define OPENAFS_VOL_STATS 1
101 #if OPENAFS_VOL_STATS
103 * Define various indices and counts used in keeping volume-level statistics.
105 #define VOL_STATS_NUM_RWINFO_FIELDS 4
107 #define VOL_STATS_SAME_NET 0 /*Within same site (total) */
108 #define VOL_STATS_SAME_NET_AUTH 1 /*Within same site (authenticated);
109 * (must be 1 more than above) */
110 #define VOL_STATS_DIFF_NET 2 /*From external site (total) */
111 #define VOL_STATS_DIFF_NET_AUTH 3 /*From external site (authenticated)
112 * (must be 1 more than above) */
114 #define VOL_STATS_NUM_TIME_RANGES 6
116 #define VOL_STATS_TIME_CAP_0 60 /*60 seconds */
117 #define VOL_STATS_TIME_CAP_1 600 /*10 minutes, in seconds */
118 #define VOL_STATS_TIME_CAP_2 3600 /*1 hour, in seconds */
119 #define VOL_STATS_TIME_CAP_3 86400 /*1 day, in seconds */
120 #define VOL_STATS_TIME_CAP_4 604800 /*1 week, in seconds */
122 #define VOL_STATS_NUM_TIME_FIELDS 6
124 #define VOL_STATS_TIME_IDX_0 0 /*0 secs to 60 secs */
125 #define VOL_STATS_TIME_IDX_1 1 /*1 min to 10 mins */
126 #define VOL_STATS_TIME_IDX_2 2 /*10 mins to 60 mins */
127 #define VOL_STATS_TIME_IDX_3 3 /*1 hr to 24 hrs */
128 #define VOL_STATS_TIME_IDX_4 4 /*1 day to 7 days */
129 #define VOL_STATS_TIME_IDX_5 5 /*Greater than 1 week */
130 #endif /* OPENAFS_VOL_STATS */
132 /* Volume header. This is the contents of the named file representing
133 * the volume. Read-only by the file server!
135 typedef struct VolumeHeader {
136 struct versionStamp stamp; /* Must be first field */
137 VolumeId id; /* Volume number */
138 VolumeId parent; /* Read-write volume number (or this volume
139 * number if this is a read-write volume) */
141 Inode smallVnodeIndex;
142 Inode largeVnodeIndex;
144 Inode volumeMountTable;
149 typedef struct VolumeDiskHeader {
150 struct versionStamp stamp; /* Must be first field */
151 VolumeId id; /* Volume number */
152 VolumeId parent; /* Read-write volume number (or this volume
153 * number if this is a read-write volume) */
154 afs_int32 volumeInfo_lo;
155 afs_int32 smallVnodeIndex_lo;
156 afs_int32 largeVnodeIndex_lo;
157 afs_int32 volumeAcl_lo;
158 afs_int32 volumeMountTable_lo;
159 afs_int32 volumeInfo_hi;
160 afs_int32 smallVnodeIndex_hi;
161 afs_int32 largeVnodeIndex_hi;
162 afs_int32 volumeAcl_hi;
163 afs_int32 volumeMountTable_hi;
164 afs_int32 linkTable_lo;
165 afs_int32 linkTable_hi;
166 /* If you add fields, add them before here and reduce the size of array */
168 } VolumeDiskHeader_t;
170 /* A vnode index file header */
171 struct IndexFileHeader {
172 struct versionStamp stamp;
176 /******************************************************************************/
177 /* Volume Data which is stored on disk and can also be maintained in memory. */
178 /******************************************************************************/
179 typedef struct VolumeDiskData {
180 struct versionStamp stamp; /* Must be first field */
181 VolumeId id; /* Volume id--unique over all systems */
182 #define VNAMESIZE 32 /* including 0 byte */
183 char name[VNAMESIZE]; /* Unofficial name for the volume */
184 byte inUse; /* Volume is being used (perhaps it is online),
185 * or the system crashed while it was used */
186 byte inService; /* Volume in service, not necessarily on line
187 * This bit is set by an operator/system
188 * programmer. Manually taking a volume offline
189 * always clears the inService bit. Taking
190 * it out of service also takes it offline */
191 byte blessed; /* Volume is administratively blessed with
192 * the ability to go on line. Set by a system
193 * administrator. Clearing this bit will
194 * take the volume offline */
195 byte needsSalvaged; /* Volume needs salvaged--an unrecoverable
196 * error occured to the volume. Note: a volume
197 * may still require salvage even if this
198 * flag isn't set--e.g. if a system crash
199 * occurred while the volume was on line. */
200 bit32 uniquifier; /* Next vnode uniquifier for this volume */
202 VolId parentId; /* Id of parent, if type==readonly */
203 VolId cloneId; /* Latest read-only clone, if type==readwrite,
204 * 0 if the volume has never been cloned. Note: the
205 * indicated volume does not necessarily exist (it
206 * may have been deleted since cloning). */
207 VolId backupId; /* Latest backup copy of this read write volume */
208 VolId restoredFromId; /* The id in the dump this volume was restored from--used simply
209 * to make sure that an incremental dump is not restored on top
210 * of something inappropriate: Note: this field itself is NEVER
212 byte needsCallback; /* Set by the salvager if anything was changed
213 * about the volume. Note: this is not set by
214 * clone/makebackups when setting the copy-on-write
215 * flag in directories; this flag is not seen by
217 #define DESTROY_ME 0xD3
218 byte destroyMe; /* If this is set to DESTROY_ME, then the salvager should destroy
219 * this volume; it is bogus (left over from an aborted volume move,
220 * for example). Note: if this flag is on, then inService should
221 * be OFF--only the salvager checks this flag */
222 #ifdef ALPHA_DUX40_ENV
223 #define DONT_SALVAGE 0xE6
224 #else /* ALPHA_DUX40_ENV */
225 #define DONT_SALVAGE 0xE5
226 #endif /* ALPHA_DUX40_ENV */
227 byte dontSalvage; /* If this is on, then don't bother salvaging this volume */
233 /* Administrative stuff */
234 int maxquota; /* Quota maximum, 1K blocks */
235 int minquota; /* Quota minimum, 1K blocks */
236 int maxfiles; /* Maximum number of files (i.e. inodes) */
237 bit32 accountNumber; /* Uninterpreted account number */
238 bit32 owner; /* The person administratively responsible
240 int reserved2[8]; /* Other administrative constraints */
242 /* Resource usage & statistics */
243 int filecount; /* Actual number of files */
244 int diskused; /* Actual disk space used, 1K blocks */
245 int dayUse; /* Metric for today's usage of this volume so far */
246 int weekUse[7]; /* Usage of the volume for the last week.
247 * weekUse[0] is for most recent complete 24 hour period
248 * of measurement; week[6] is 7 days ago */
249 Date dayUseDate; /* Date the dayUse statistics refer to; the week use stats
250 * are the preceding 7 days */
251 int reserved3[11]; /* Other stats here */
253 /* Server supplied dates */
254 Date creationDate; /* Creation date for a read/write
255 * volume; cloning date for original copy of
256 * a readonly volume (replicated volumes have
257 * the same creation date) */
258 Date accessDate; /* Last access time by a user, large granularity */
259 Date updateDate; /* Last modification by user */
260 Date expirationDate; /* 0 if it never expires */
261 Date backupDate; /* last time a backup clone was taken */
263 /* Time that this copy of this volume was made. NEVER backed up. This field is only
264 * set when the copy is created */
267 #if OPENAFS_VOL_STATS
268 bit32 stat_initialized; /*Are the stat fields below set up? */
272 #endif /* OPENAFS_VOL_STATS */
276 char offlineMessage[VMSGSIZE]; /* Why the volume is offline */
277 #if OPENAFS_VOL_STATS
278 #define VOL_STATS_BYTES 128
280 * Keep per-volume aggregate statistics on type and distance of access,
281 * along with authorship info.
283 bit32 stat_reads[VOL_STATS_NUM_RWINFO_FIELDS];
284 bit32 stat_writes[VOL_STATS_NUM_RWINFO_FIELDS];
285 bit32 stat_fileSameAuthor[VOL_STATS_NUM_TIME_FIELDS];
286 bit32 stat_fileDiffAuthor[VOL_STATS_NUM_TIME_FIELDS];
287 bit32 stat_dirSameAuthor[VOL_STATS_NUM_TIME_FIELDS];
288 bit32 stat_dirDiffAuthor[VOL_STATS_NUM_TIME_FIELDS];
290 char motd[VMSGSIZE]; /* Volume "message of the day" */
291 #endif /* OPENAFS_VOL_STATS */
296 /**************************************/
297 /* Memory resident volume information */
298 /**************************************/
299 typedef struct Volume {
300 struct Volume *hashNext; /* Next in hash resolution table */
301 VolumeId hashid; /* Volume number -- for hash table lookup */
302 struct volHeader *header; /* Cached disk data */
303 Device device; /* Unix device for the volume */
305 *partition; /* Information about the Unix partition */
307 IHandle_t *handle; /* Unix inode holding this index */
308 byte *bitmap; /* Index bitmap */
309 afs_uint32 bitmapSize; /* length of bitmap, in bytes */
310 afs_uint32 bitmapOffset; /* Which byte address of the first long to
311 * start search from in bitmap */
312 } vnodeIndex[nVNODECLASSES];
313 IHandle_t *linkHandle;
314 Unique nextVnodeUnique; /* Derived originally from volume uniquifier.
315 * This is the actual next version number to
316 * assign; the uniquifier is bumped by 200 and
317 * and written to disk every 200 file creates
318 * If the volume is shutdown gracefully, the
319 * uniquifier should be rewritten with the
320 * value nextVnodeVersion */
321 IHandle_t *diskDataHandle; /* Unix inode holding general volume info */
322 bit16 vnodeHashOffset; /* Computed by HashOffset function in vnode.h.
323 * Assigned to the volume when initialized.
324 * Added to vnode number for hash table index */
325 byte shuttingDown; /* This volume is going to be detached */
326 byte goingOffline; /* This volume is going offline */
327 bit32 cacheCheck; /* Online sequence number to be used to invalidate vnode cache entries
328 * that stayed around while a volume was offline */
329 short nUsers; /* Number of users of this volume header */
330 byte needsPutBack; /* For a volume utility, this flag is set if we need
331 * to give the volume back when we detach it. The server has
332 * certain modes where it doesn't detach the volume, and
333 * if we give it back spuriously, the server aborts. This field
334 * is meaningless on the file server */
335 byte specialStatus; /* An error code to return on VGetVolume: the
336 * volume is unavailable for the reason quoted,
337 * currently VBUSY or VMOVED */
338 afs_uint32 updateTime; /* Time that this volume was put on the updated
339 * volume list--the list of volumes that will be
340 * salvaged should the file server crash */
344 struct volHeader *prev, *next; /* LRU pointers */
345 VolumeDiskData diskstuff; /* General volume info read from disk */
346 Volume *back; /* back pointer to current volume structure */
349 /* These macros are used to export fields within the volume header. This was added
350 to facilitate changing the actual representation */
352 #define V_device(vp) ((vp)->device)
353 #define V_partition(vp) ((vp)->partition)
354 #define V_diskDataHandle(vp) ((vp)->diskDataHandle)
355 #define V_vnodeIndex(vp) ((vp)->vnodeIndex)
356 #define V_nextVnodeUnique(vp) ((vp)->nextVnodeUnique)
357 #define V_linkHandle(vp) ((vp)->linkHandle)
359 /* N.B. V_id must be this, rather than vp->id, or some programs will break, probably */
360 #define V_stamp(vp) ((vp)->header->diskstuff.stamp)
361 #define V_id(vp) ((vp)->header->diskstuff.id)
362 #define V_name(vp) ((vp)->header->diskstuff.name)
363 #define V_inUse(vp) ((vp)->header->diskstuff.inUse)
364 #define V_inService(vp) ((vp)->header->diskstuff.inService)
365 #define V_blessed(vp) ((vp)->header->diskstuff.blessed)
366 #define V_needsSalvaged(vp) ((vp)->header->diskstuff.needsSalvaged)
367 #define V_uniquifier(vp) ((vp)->header->diskstuff.uniquifier)
368 #define V_type(vp) ((vp)->header->diskstuff.type)
369 #define V_parentId(vp) ((vp)->header->diskstuff.parentId)
370 #define V_cloneId(vp) ((vp)->header->diskstuff.cloneId)
371 #define V_backupId(vp) ((vp)->header->diskstuff.backupId)
372 #define V_restoredFromId(vp) ((vp)->header->diskstuff.restoredFromId)
373 #define V_needsCallback(vp) ((vp)->header->diskstuff.needsCallback)
374 #define V_destroyMe(vp) ((vp)->header->diskstuff.destroyMe)
375 #define V_dontSalvage(vp) ((vp)->header->diskstuff.dontSalvage)
376 #define V_maxquota(vp) ((vp)->header->diskstuff.maxquota)
377 #define V_minquota(vp) ((vp)->header->diskstuff.minquota)
378 #define V_maxfiles(vp) ((vp)->header->diskstuff.maxfiles)
379 #define V_accountNumber(vp) ((vp)->header->diskstuff.accountNumber)
380 #define V_owner(vp) ((vp)->header->diskstuff.owner)
381 #define V_filecount(vp) ((vp)->header->diskstuff.filecount)
382 #define V_diskused(vp) ((vp)->header->diskstuff.diskused)
383 #define V_dayUse(vp) ((vp)->header->diskstuff.dayUse)
384 #define V_weekUse(vp) ((vp)->header->diskstuff.weekUse)
385 #define V_dayUseDate(vp) ((vp)->header->diskstuff.dayUseDate)
386 #define V_creationDate(vp) ((vp)->header->diskstuff.creationDate)
387 #define V_accessDate(vp) ((vp)->header->diskstuff.accessDate)
388 #define V_updateDate(vp) ((vp)->header->diskstuff.updateDate)
389 #define V_expirationDate(vp) ((vp)->header->diskstuff.expirationDate)
390 #define V_backupDate(vp) ((vp)->header->diskstuff.backupDate)
391 #define V_copyDate(vp) ((vp)->header->diskstuff.copyDate)
392 #define V_offlineMessage(vp) ((vp)->header->diskstuff.offlineMessage)
393 #define V_disk(vp) ((vp)->header->diskstuff)
394 #define V_motd(vp) ((vp)->header->diskstuff.motd)
395 #if OPENAFS_VOL_STATS
396 #define V_stat_initialized(vp) ((vp)->header->diskstuff.stat_initialized)
397 #define V_stat_area(vp) (((vp)->header->diskstuff.stat_reads))
398 #define V_stat_reads(vp, idx) (((vp)->header->diskstuff.stat_reads)[idx])
399 #define V_stat_writes(vp, idx) (((vp)->header->diskstuff.stat_writes)[idx])
400 #define V_stat_fileSameAuthor(vp, idx) (((vp)->header->diskstuff.stat_fileSameAuthor)[idx])
401 #define V_stat_fileDiffAuthor(vp, idx) (((vp)->header->diskstuff.stat_fileDiffAuthor)[idx])
402 #define V_stat_dirSameAuthor(vp, idx) (((vp)->header->diskstuff.stat_dirSameAuthor)[idx])
403 #define V_stat_dirDiffAuthor(vp, idx) (((vp)->header->diskstuff.stat_dirDiffAuthor)[idx])
404 #endif /* OPENAFS_VOL_STATS */
406 /* File offset computations. The offset values in the volume header are
407 computed with these macros -- when the file is written only!! */
408 #define VOLUME_MOUNT_TABLE_OFFSET(Volume) (sizeof (VolumeDiskData))
409 #define VOLUME_BITMAP_OFFSET(Volume) \
410 (sizeof (VolumeDiskData) + (Volume)->disk.mountTableSize)
413 extern char *VSalvageMessage; /* Canonical message when a volume is forced
415 extern Volume *VGetVolume(Error * ec, VolId volumeId);
416 extern Volume *VGetVolume_r(Error * ec, VolId volumeId);
417 extern void VPutVolume(Volume *);
418 extern void VPutVolume_r(Volume *);
419 extern void VOffline(Volume * vp, char *message);
420 extern void VOffline_r(Volume * vp, char *message);
421 extern int VConnectFS(void);
422 extern int VConnectFS_r(void);
423 extern Volume *VAttachVolume(Error * ec, VolumeId volumeId, int mode);
424 extern Volume *VAttachVolume_r(Error * ec, VolumeId volumeId, int mode);
425 extern Volume *VCreateVolume(Error * ec, char *partname, VolId volumeId,
427 extern Volume *VCreateVolume_r(Error * ec, char *partname, VolId volumeId,
429 extern VnodeId VAllocBitmapEntry(Error * ec, Volume * vp,
430 struct vnodeIndex *index);
431 extern VnodeId VAllocBitmapEntry_r(Error * ec, Volume * vp,
432 struct vnodeIndex *index);
433 extern void VFreeBitMapEntry(Error * ec, register struct vnodeIndex *index,
435 extern void VFreeBitMapEntry_r(Error * ec, register struct vnodeIndex *index,
437 extern int VolumeNumber(char *name);
438 extern char *VolumeExternalName(VolumeId volumeId);
439 extern Volume *VAttachVolumeByName(Error * ec, char *partition, char *name,
441 extern Volume *VAttachVolumeByName_r(Error * ec, char *partition, char *name,
443 extern void VShutdown(void);
444 extern void VUpdateVolume(Error * ec, Volume * vp);
445 extern void VUpdateVolume_r(Error * ec, Volume * vp);
446 extern void VAddToVolumeUpdateList(Error * ec, Volume * vp);
447 extern void VAddToVolumeUpdateList_r(Error * ec, Volume * vp);
448 extern void VDetachVolume(Error * ec, Volume * vp);
449 extern void VDetachVolume_r(Error * ec, Volume * vp);
450 extern void VForceOffline(Volume * vp);
451 extern void VForceOffline_r(Volume * vp);
452 extern void VBumpVolumeUsage(register Volume * vp);
453 extern void VBumpVolumeUsage_r(register Volume * vp);
454 extern void VSetDiskUsage(void);
455 extern void VPrintCacheStats(void);
456 extern void VReleaseVnodeFiles_r(Volume * vp);
457 extern void VCloseVnodeFiles_r(Volume * vp);
458 extern struct DiskPartition *VGetPartition(char *name, int abortp);
459 extern struct DiskPartition *VGetPartition_r(char *name, int abortp);
460 extern int VInitVolumePackage(ProgramType pt, int nLargeVnodes,
461 int nSmallVnodes, int connect, int volcache);
462 extern void DiskToVolumeHeader(VolumeHeader_t * h, VolumeDiskHeader_t * dh);
463 extern void VolumeHeaderToDisk(VolumeDiskHeader_t * dh, VolumeHeader_t * h);
464 extern void VTakeOffline_r(register Volume * vp);
465 extern void VTakeOffline(register Volume * vp);
468 /* Naive formula relating number of file size to number of 1K blocks in file */
469 /* Note: we charge 1 block for 0 length files so the user can't store
470 an inifite number of them; for most files, we give him the inode, vnode,
471 and indirect block overhead, for FREE! */
472 #define nBlocks(bytes) ((afs_sfsize_t)((bytes) == 0? 1: (((afs_sfsize_t)(bytes))+1023)/1024))
474 /* Client process id -- file server sends a Check volumes signal back to the client at this pid */
475 #define CLIENTPID "/vice/vol/clientpid"
477 /* Modes of attachment, for VAttachVolume[ByName] to convey to the file server */
478 #define V_READONLY 1 /* Absolutely no updates will be done to the volume */
479 #define V_CLONE 2 /* Cloning the volume: if it is read/write, then directory
480 * version numbers will change. Header will be updated. If
481 * the volume is read-only, the file server may continue to
482 * server it; it may also continue to server it in read/write
483 * mode if the writes are deferred */
484 #define V_VOLUPD 3 /* General update or volume purge is possible. Volume must
486 #define V_DUMP 4 /* A dump of the volume is requested; the volume can be served
487 * read-only during this time */
488 #define V_SECRETLY 5 /* Secret attach of the volume. This is used to attach a volume
489 * which the file server doesn't know about--and which it shouldn't
490 * know about yet, since the volume has just been created and
491 * is somewhat bogus. Required to make sure that a file server
492 * never knows about more than one copy of the same volume--when
493 * a volume is moved from one partition to another on a single
495 #define V_PEEK 6 /* "Peek" at the volume without telling the fileserver. This is
496 * similar to V_SECRETLY, but read-only. It is used in cases where
497 * not impacting fileserver performance is more important than
498 * getting the most recent data. */
501 #if defined(NEARINODE_HINT)
502 #define V_pref(vp,nearInode) nearInodeHash(V_id(vp),(nearInode)); (nearInode) %= V_partition(vp)->f_files
504 #define V_pref(vp,nearInode) nearInode = 0
505 #endif /* NEARINODE_HINT */
507 #endif /* __volume_h */