2 * Copyright 2009-2010, Sine Nomine Associates and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * volume group membership cache
13 * asynchronous partition scanner
16 #include <afsconfig.h>
17 #include <afs/param.h>
19 #ifdef AFS_DEMAND_ATTACH_FS
27 #include <afs/assert.h>
30 #include <sys/param.h>
32 #if defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
35 #include <afs/afsutil.h>
38 #include <afs/afsint.h>
42 #include "viceinode.h"
44 #include "partition.h"
45 #include <afs/errors.h>
47 #define __VOL_VG_CACHE_IMPL 1
50 #include "vg_cache_impl.h"
53 #define afs_open open64
54 #else /* !O_LARGEFILE */
56 #endif /* !O_LARGEFILE */
58 static int _VVGC_scan_table_init(VVGCache_scan_table_t * tbl);
59 static int _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
60 struct DiskPartition64 * dp,
63 static int _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
64 struct DiskPartition64 * dp);
65 static void * _VVGC_scanner_thread(void *);
66 static int _VVGC_scan_partition(struct DiskPartition64 * part);
67 static VVGCache_dlist_entry_t * _VVGC_dlist_lookup_r(struct DiskPartition64 *dp,
70 static void _VVGC_flush_dlist(struct DiskPartition64 *dp);
73 * init a thread-local scan table.
75 * @param[in] tbl scan table
77 * @return operation status
83 _VVGC_scan_table_init(VVGCache_scan_table_t * tbl)
85 memset(tbl, 0, sizeof(*tbl));
91 * add an entry to the thread-local scan table.
93 * @param[in] tbl scan table
94 * @param[in] dp disk partition object
95 * @param[in] volid volume id
96 * @param[in] parent parent volume id
98 * @pre VOL_LOCK is NOT held
100 * @note if the table is full, this routine will acquire
101 * VOL_LOCK and flush the table to the global one.
103 * @return operation status
105 * @retval nonzero a VVGCache_entry_add_r operation failed during a
106 * flush of the thread-local table
111 _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
112 struct DiskPartition64 * dp,
118 if (tbl->idx == VVGC_SCAN_TBL_LEN) {
119 code = _VVGC_scan_table_flush(tbl, dp);
122 tbl->entries[tbl->idx].volid = volid;
123 tbl->entries[tbl->idx].parent = parent;
130 * flush thread-local scan table to the global VG cache.
132 * @param[in] tbl scan table
133 * @param[in] dp disk partition object
135 * @pre VOL_LOCK is NOT held
137 * @return operation status
139 * @retval nonzero a VVGCache_entry_add_r operation failed during a
140 * flush of the thread-local table
145 _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
146 struct DiskPartition64 * dp)
148 int code = 0, res, i;
150 unsigned long newvols, newvgs;
152 newvols = tbl->newvols;
153 newvgs = tbl->newvgs;
157 for (i = 0; i < tbl->idx; i++) {
159 * We need to check the 'to-delete' list and prevent adding any entries
160 * that are on it. The volser could potentially create a volume in one
161 * VG, then delete it and put it on another VG. If we are doing a scan
162 * when that happens, tbl->entries could have the entries for trying to
163 * put the vol on both VGs, though at least one of them will also be on
164 * the dlist. If we put everything in tbl->entries on the VGC then try
165 * to delete afterwards, putting one entry on the VGC cause an error,
166 * and we'll fail to add it. So instead, avoid adding any new VGC
167 * entries if it is on the dlist.
169 if (_VVGC_dlist_lookup_r(dp, tbl->entries[i].parent,
170 tbl->entries[i].volid)) {
173 res = VVGCache_entry_add_r(dp,
174 tbl->entries[i].parent,
175 tbl->entries[i].volid,
185 /* flush the to-delete list while we're here. We don't need to preserve
186 * the list across the entire scan, and flushing it each time we flush
187 * a scan table will keep the size of the dlist down */
188 _VVGC_flush_dlist(dp);
192 ViceLog(125, ("VVGC_scan_table_flush: flushed %d entries from "
193 "scan table to global VG cache\n", tbl->idx));
194 ViceLog(125, ("VVGC_scan_table_flush: %s total: %lu vols, %lu groups\n",
195 VPartitionPath(dp), newvols, newvgs));
197 res = _VVGC_scan_table_init(tbl);
202 tbl->newvols = newvols;
203 tbl->newvgs = newvgs;
209 * read a volume header from disk into a VolumeHeader structure.
211 * @param[in] path absolute path to .vol volume header
212 * @param[out] hdr volume header object
214 * @return operation status
216 * @retval ENOENT volume header does not exist
217 * @retval EINVAL volume header is invalid
222 _VVGC_read_header(const char *path, struct VolumeHeader *hdr)
226 struct VolumeDiskHeader diskHeader;
228 fd = afs_open(path, O_RDONLY);
230 ViceLog(0, ("_VVGC_read_header: could not open %s; error = %d\n",
235 code = read(fd, &diskHeader, sizeof(diskHeader));
237 if (code != sizeof(diskHeader)) {
238 ViceLog(0, ("_VVGC_read_header: could not read disk header from %s; error = %d\n",
243 if (diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
244 ViceLog(0, ("_VVGC_read_header: disk header %s has magic %lu, should "
246 afs_printable_uint32_lu(diskHeader.stamp.magic),
247 afs_printable_uint32_lu(VOLUMEHEADERMAGIC)));
251 DiskToVolumeHeader(hdr, &diskHeader);
256 * determines what to do with a volume header during a VGC scan.
258 * @param[in] dp the disk partition object
259 * @param[in] node_path the absolute path to the header to handle
260 * @param[out] hdr the header read in from disk
261 * @param[out] skip 1 if we should skip the header (pretend it doesn't
262 * exist), 0 otherwise
264 * @return operation status
266 * @retval -1 internal error beyond just failing to read the header file
269 _VVGC_handle_header(struct DiskPartition64 *dp, const char *node_path,
270 struct VolumeHeader *hdr, int *skip)
276 code = _VVGC_read_header(node_path, hdr);
278 /* retry while holding a partition write lock, to ensure we're not
279 * racing a writer/creator of the header */
281 if (code == ENOENT) {
282 /* Ignore ENOENT; it's as if we never got it from readdir in the
283 * first place. Other error codes means the header exists, but
284 * there's something wrong with it. */
288 code = VPartHeaderLock(dp, WRITE_LOCK);
290 ViceLog(0, ("_VVGC_handle_header: error acquiring partition "
291 "write lock while trying to open %s\n",
295 code = _VVGC_read_header(node_path, hdr);
296 VPartHeaderUnlock(dp, WRITE_LOCK);
300 if (code != ENOENT) {
301 ViceLog(0, ("_VVGC_scan_partition: %s does not appear to be a "
302 "legitimate volume header file; deleted\n",
305 if (unlink(node_path)) {
306 ViceLog(0, ("Unable to unlink %s (errno = %d)\n",
313 /* header is fine; do not skip it, and do not error out */
319 * scan a disk partition for .vol files
321 * @param[in] part disk partition object
323 * @pre VOL_LOCK is NOT held
325 * @return operation status
327 * @retval -1 invalid disk partition object
328 * @retval -2 failed to flush stale entries for this partition
333 _VVGC_scan_partition(struct DiskPartition64 * part)
337 struct VolumeHeader hdr;
339 VVGCache_scan_table_t tbl;
340 char *part_path = NULL, *p;
341 char node_path[MAXPATHLEN];
343 code = _VVGC_scan_table_init(&tbl);
345 ViceLog(0, ("VVGC_scan_partition: could not init scan table; error = %d\n",
349 part_path = VPartitionPath(part);
350 if (part_path == NULL) {
351 ViceLog(0, ("VVGC_scan_partition: invalid partition object given; aborting scan\n"));
357 res = _VVGC_flush_part_r(part);
359 ViceLog(0, ("VVGC_scan_partition: error flushing partition %s; error = %d\n",
360 VPartitionPath(part), res));
368 dirp = opendir(part_path);
370 ViceLog(0, ("VVGC_scan_partition: could not open %s, aborting scan; error = %d\n",
376 ViceLog(5, ("VVGC_scan_partition: scanning partition %s for VG cache\n",
379 while ((dp = readdir(dirp))) {
380 p = strrchr(dp->d_name, '.');
381 if (p == NULL || strcmp(p, VHDREXT) != 0) {
387 VPartitionPath(part),
390 res = _VVGC_handle_header(part, node_path, &hdr, &skip);
392 /* internal error; error out */
400 res = _VVGC_scan_table_add(&tbl,
405 ViceLog(0, ("VVGC_scan_partition: error %d adding volume %s to scan table\n",
411 _VVGC_scan_table_flush(&tbl, part);
419 ViceLog(0, ("VVGC_scan_partition: error %d while scanning %s\n",
422 ViceLog(0, ("VVGC_scan_partition: finished scanning %s: %lu volumes in %lu groups\n",
423 part_path, tbl.newvols, tbl.newvgs));
428 _VVGC_flush_dlist(part);
429 free(VVGCache.part[part->index].dlist_hash_buckets);
430 VVGCache.part[part->index].dlist_hash_buckets = NULL;
433 _VVGC_state_change(part, VVGC_PART_STATE_INVALID);
435 _VVGC_state_change(part, VVGC_PART_STATE_VALID);
447 _VVGC_scanner_thread(void * args)
449 struct DiskPartition64 *part = args;
452 code = _VVGC_scan_partition(part);
454 ViceLog(0, ("Error: _VVGC_scan_partition failed with code %d for partition %s\n",
455 code, VPartitionPath(part)));
462 * start a background scan.
464 * @param[in] dp disk partition object
466 * @return operation status
468 * @retval -1 internal error
469 * @retval -3 racing against another thread
474 _VVGC_scan_start(struct DiskPartition64 * dp)
478 pthread_attr_t attrs;
481 if (_VVGC_state_change(dp,
482 VVGC_PART_STATE_UPDATING)
483 == VVGC_PART_STATE_UPDATING) {
485 ViceLog(0, ("VVGC_scan_partition: race detected; aborting scanning partition %s\n",
486 VPartitionPath(dp)));
491 /* initialize partition's to-delete list */
492 VVGCache.part[dp->index].dlist_hash_buckets =
493 malloc(VolumeHashTable.Size * sizeof(struct rx_queue));
494 if (!VVGCache.part[dp->index].dlist_hash_buckets) {
498 for (i = 0; i < VolumeHashTable.Size; i++) {
499 queue_Init(&VVGCache.part[dp->index].dlist_hash_buckets[i]);
502 code = pthread_attr_init(&attrs);
507 code = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
512 code = pthread_create(&tid, &attrs, &_VVGC_scanner_thread, dp);
515 VVGCache_part_state_t old_state;
517 ViceLog(0, ("_VVGC_scan_start: pthread_create failed with %d\n", code));
519 old_state = _VVGC_state_change(dp, VVGC_PART_STATE_INVALID);
520 assert(old_state == VVGC_PART_STATE_UPDATING);
525 ViceLog(0, ("_VVGC_scan_start failed with code %d for partition %s\n",
526 code, VPartitionPath(dp)));
527 if (VVGCache.part[dp->index].dlist_hash_buckets) {
528 free(VVGCache.part[dp->index].dlist_hash_buckets);
529 VVGCache.part[dp->index].dlist_hash_buckets = NULL;
537 * looks up an entry on the to-delete list, if it exists.
539 * @param[in] dp the partition whose dlist we are looking at
540 * @param[in] parent the parent volume ID we're looking for
541 * @param[in] child the child volume ID we're looking for
543 * @return a pointer to the entry in the dlist for that entry
544 * @retval NULL the requested entry does not exist in the dlist
546 static VVGCache_dlist_entry_t *
547 _VVGC_dlist_lookup_r(struct DiskPartition64 *dp, VolumeId parent,
550 int bucket = VVGC_HASH(child);
551 VVGCache_dlist_entry_t *ent, *nent;
553 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
555 VVGCache_dlist_entry)) {
557 if (ent->child == child && ent->parent == parent) {
566 * delete all of the entries in the dlist from the VGC.
568 * Traverses the to-delete list for the specified partition, and deletes
569 * the specified entries from the global VGC. Also deletes the entries from
570 * the dlist itself as it goes along.
572 * @param[in] dp the partition whose dlist we are flushing
575 _VVGC_flush_dlist(struct DiskPartition64 *dp)
578 VVGCache_dlist_entry_t *ent, *nent;
580 for (i = 0; i < VolumeHashTable.Size; i++) {
581 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[i],
583 VVGCache_dlist_entry)) {
585 _VVGC_entry_purge_r(dp, ent->parent, ent->child);
593 * add a VGC entry to the partition's to-delete list.
595 * This adds a VGC entry (a parent/child pair) to a list of VGC entries to
596 * be deleted from the VGC at the end of a VGC scan. This is necessary,
597 * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan
598 * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation
599 * may delete a volume, only for it to be added again when the VGC scan's
600 * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan
601 * is running, this function must be called to ensure it does not come
604 * @param[in] dp the partition to whose dlist we are adding
605 * @param[in] parent the parent volumeID of the VGC entry
606 * @param[in] child the child volumeID of the VGC entry
608 * @return operation status
610 * @retval ENOMEM memory allocation error
612 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
614 * @internal VGC use only
617 _VVGC_dlist_add_r(struct DiskPartition64 *dp, VolumeId parent,
620 int bucket = VVGC_HASH(child);
621 VVGCache_dlist_entry_t *entry;
623 entry = malloc(sizeof(*entry));
628 entry->child = child;
629 entry->parent = parent;
631 queue_Append(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
637 * delete a VGC entry from the partition's to-delete list.
639 * When a VGC scan is ocurring, and a volume is removed, but then created
640 * again, we need to ensure that it does not get deleted from being on the
641 * dlist. Call this function whenever adding a new entry to the VGC during
642 * a VGC scan to ensure it doesn't get deleted later.
644 * @param[in] dp the partition from whose dlist we are deleting
645 * @param[in] parent the parent volumeID of the VGC entry
646 * @param[in] child the child volumeID of the VGC entry
648 * @return operation status
650 * @retval ENOENT the specified VGC entry is not on the dlist
652 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
654 * @internal VGC use only
656 * @see _VVGC_dlist_add_r
659 _VVGC_dlist_del_r(struct DiskPartition64 *dp, VolumeId parent,
662 VVGCache_dlist_entry_t *ent;
664 ent = _VVGC_dlist_lookup_r(dp, parent, child);
675 #endif /* AFS_DEMAND_ATTACH_FS */