2 * Copyright 2009-2010, Sine Nomine Associates and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * volume group membership cache
13 * asynchronous partition scanner
16 #include <afsconfig.h>
17 #include <afs/param.h>
21 #ifdef HAVE_SYS_FILE_H
25 #ifdef AFS_DEMAND_ATTACH_FS
29 #include <afs/afsutil.h>
32 #include <afs/afsint.h>
36 #include "viceinode.h"
38 #include "partition.h"
39 #include <afs/errors.h>
41 #define __VOL_VG_CACHE_IMPL 1
44 #include "vg_cache_impl.h"
46 static int _VVGC_scan_table_init(VVGCache_scan_table_t * tbl);
47 static int _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
48 struct DiskPartition64 * dp,
51 static int _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
52 struct DiskPartition64 * dp);
53 static void * _VVGC_scanner_thread(void *);
54 static int _VVGC_scan_partition(struct DiskPartition64 * part);
55 static VVGCache_dlist_entry_t * _VVGC_dlist_lookup_r(struct DiskPartition64 *dp,
58 static void _VVGC_flush_dlist(struct DiskPartition64 *dp);
61 * init a thread-local scan table.
63 * @param[in] tbl scan table
65 * @return operation status
71 _VVGC_scan_table_init(VVGCache_scan_table_t * tbl)
73 memset(tbl, 0, sizeof(*tbl));
79 * add an entry to the thread-local scan table.
81 * @param[in] tbl scan table
82 * @param[in] dp disk partition object
83 * @param[in] volid volume id
84 * @param[in] parent parent volume id
86 * @pre VOL_LOCK is NOT held
88 * @note if the table is full, this routine will acquire
89 * VOL_LOCK and flush the table to the global one.
91 * @return operation status
93 * @retval nonzero a VVGCache_entry_add_r operation failed during a
94 * flush of the thread-local table
99 _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
100 struct DiskPartition64 * dp,
106 if (tbl->idx == VVGC_SCAN_TBL_LEN) {
107 code = _VVGC_scan_table_flush(tbl, dp);
110 tbl->entries[tbl->idx].volid = volid;
111 tbl->entries[tbl->idx].parent = parent;
118 * flush thread-local scan table to the global VG cache.
120 * @param[in] tbl scan table
121 * @param[in] dp disk partition object
123 * @pre VOL_LOCK is NOT held
125 * @return operation status
127 * @retval nonzero a VVGCache_entry_add_r operation failed during a
128 * flush of the thread-local table
133 _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
134 struct DiskPartition64 * dp)
136 int code = 0, res, i;
138 unsigned long newvols, newvgs;
140 newvols = tbl->newvols;
141 newvgs = tbl->newvgs;
145 for (i = 0; i < tbl->idx; i++) {
147 * We need to check the 'to-delete' list and prevent adding any entries
148 * that are on it. The volser could potentially create a volume in one
149 * VG, then delete it and put it on another VG. If we are doing a scan
150 * when that happens, tbl->entries could have the entries for trying to
151 * put the vol on both VGs, though at least one of them will also be on
152 * the dlist. If we put everything in tbl->entries on the VGC then try
153 * to delete afterwards, putting one entry on the VGC cause an error,
154 * and we'll fail to add it. So instead, avoid adding any new VGC
155 * entries if it is on the dlist.
157 if (_VVGC_dlist_lookup_r(dp, tbl->entries[i].parent,
158 tbl->entries[i].volid)) {
161 res = VVGCache_entry_add_r(dp,
162 tbl->entries[i].parent,
163 tbl->entries[i].volid,
173 /* flush the to-delete list while we're here. We don't need to preserve
174 * the list across the entire scan, and flushing it each time we flush
175 * a scan table will keep the size of the dlist down */
176 _VVGC_flush_dlist(dp);
180 ViceLog(125, ("VVGC_scan_table_flush: flushed %d entries from "
181 "scan table to global VG cache\n", tbl->idx));
182 ViceLog(125, ("VVGC_scan_table_flush: %s total: %lu vols, %lu groups\n",
183 VPartitionPath(dp), newvols, newvgs));
185 res = _VVGC_scan_table_init(tbl);
190 tbl->newvols = newvols;
191 tbl->newvgs = newvgs;
197 * record a volume header found by VWalkVolumeHeaders in a VGC scan table.
199 * @param[in] dp the disk partition
200 * @param[in] name full path to the .vol header (unused)
201 * @param[in] hdr the header data
202 * @param[in] last whether this is the last try or not (unused)
203 * @param[in] rock actually a VVGCache_scan_table_t* to add the volume to
205 * @return operation status
207 * @retval -1 fatal error adding vol to the scan table
210 _VVGC_RecordHeader(struct DiskPartition64 *dp, const char *name,
211 struct VolumeDiskHeader *hdr, int last, void *rock)
214 VVGCache_scan_table_t *tbl;
215 tbl = (VVGCache_scan_table_t *)rock;
217 code = _VVGC_scan_table_add(tbl, dp, hdr->id, hdr->parent);
219 ViceLog(0, ("VVGC_scan_partition: error %d adding volume %s to scan table\n",
227 * unlink a faulty volume header found by VWalkVolumeHeaders.
229 * @param[in] dp the disk partition (unused)
230 * @param[in] name the full path to the .vol header
231 * @param[in] hdr the header data (unused)
232 * @param[in] rock unused
235 _VVGC_UnlinkHeader(struct DiskPartition64 *dp, const char *name,
236 struct VolumeDiskHeader *hdr, void *rock)
238 ViceLog(0, ("%s is not a legitimate volume header file; deleted\n", name));
240 ViceLog(0, ("Unable to unlink %s (errno = %d)\n",
246 * scan a disk partition for .vol files
248 * @param[in] part disk partition object
250 * @pre VOL_LOCK is NOT held
252 * @return operation status
254 * @retval -1 invalid disk partition object
255 * @retval -2 failed to flush stale entries for this partition
260 _VVGC_scan_partition(struct DiskPartition64 * part)
264 VVGCache_scan_table_t tbl;
265 char *part_path = NULL;
267 code = _VVGC_scan_table_init(&tbl);
269 ViceLog(0, ("VVGC_scan_partition: could not init scan table; error = %d\n",
273 part_path = VPartitionPath(part);
274 if (part_path == NULL) {
275 ViceLog(0, ("VVGC_scan_partition: invalid partition object given; aborting scan\n"));
281 res = _VVGC_flush_part_r(part);
283 ViceLog(0, ("VVGC_scan_partition: error flushing partition %s; error = %d\n",
284 VPartitionPath(part), res));
292 dirp = opendir(part_path);
294 ViceLog(0, ("VVGC_scan_partition: could not open %s, aborting scan; error = %d\n",
300 ViceLog(5, ("VVGC_scan_partition: scanning partition %s for VG cache\n",
303 code = VWalkVolumeHeaders(part, part_path, _VVGC_RecordHeader,
304 _VVGC_UnlinkHeader, &tbl);
309 _VVGC_scan_table_flush(&tbl, part);
317 ViceLog(0, ("VVGC_scan_partition: error %d while scanning %s\n",
320 ViceLog(0, ("VVGC_scan_partition: finished scanning %s: %lu volumes in %lu groups\n",
321 part_path, tbl.newvols, tbl.newvgs));
326 _VVGC_flush_dlist(part);
327 free(VVGCache.part[part->index].dlist_hash_buckets);
328 VVGCache.part[part->index].dlist_hash_buckets = NULL;
331 _VVGC_state_change(part, VVGC_PART_STATE_INVALID);
333 _VVGC_state_change(part, VVGC_PART_STATE_VALID);
345 _VVGC_scanner_thread(void * args)
347 struct DiskPartition64 *part = args;
350 code = _VVGC_scan_partition(part);
352 ViceLog(0, ("Error: _VVGC_scan_partition failed with code %d for partition %s\n",
353 code, VPartitionPath(part)));
360 * start a background scan.
362 * @param[in] dp disk partition object
364 * @return operation status
366 * @retval -1 internal error
367 * @retval -3 racing against another thread
372 _VVGC_scan_start(struct DiskPartition64 * dp)
376 pthread_attr_t attrs;
379 if (_VVGC_state_change(dp,
380 VVGC_PART_STATE_UPDATING)
381 == VVGC_PART_STATE_UPDATING) {
383 ViceLog(0, ("VVGC_scan_partition: race detected; aborting scanning partition %s\n",
384 VPartitionPath(dp)));
389 /* initialize partition's to-delete list */
390 VVGCache.part[dp->index].dlist_hash_buckets =
391 malloc(VolumeHashTable.Size * sizeof(struct rx_queue));
392 if (!VVGCache.part[dp->index].dlist_hash_buckets) {
396 for (i = 0; i < VolumeHashTable.Size; i++) {
397 queue_Init(&VVGCache.part[dp->index].dlist_hash_buckets[i]);
400 code = pthread_attr_init(&attrs);
405 code = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
410 code = pthread_create(&tid, &attrs, &_VVGC_scanner_thread, dp);
413 VVGCache_part_state_t old_state;
415 ViceLog(0, ("_VVGC_scan_start: pthread_create failed with %d\n", code));
417 old_state = _VVGC_state_change(dp, VVGC_PART_STATE_INVALID);
418 opr_Assert(old_state == VVGC_PART_STATE_UPDATING);
423 ViceLog(0, ("_VVGC_scan_start failed with code %d for partition %s\n",
424 code, VPartitionPath(dp)));
425 if (VVGCache.part[dp->index].dlist_hash_buckets) {
426 free(VVGCache.part[dp->index].dlist_hash_buckets);
427 VVGCache.part[dp->index].dlist_hash_buckets = NULL;
435 * looks up an entry on the to-delete list, if it exists.
437 * @param[in] dp the partition whose dlist we are looking at
438 * @param[in] parent the parent volume ID we're looking for
439 * @param[in] child the child volume ID we're looking for
441 * @return a pointer to the entry in the dlist for that entry
442 * @retval NULL the requested entry does not exist in the dlist
444 static VVGCache_dlist_entry_t *
445 _VVGC_dlist_lookup_r(struct DiskPartition64 *dp, VolumeId parent,
448 int bucket = VVGC_HASH(child);
449 VVGCache_dlist_entry_t *ent, *nent;
451 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
453 VVGCache_dlist_entry)) {
455 if (ent->child == child && ent->parent == parent) {
464 * delete all of the entries in the dlist from the VGC.
466 * Traverses the to-delete list for the specified partition, and deletes
467 * the specified entries from the global VGC. Also deletes the entries from
468 * the dlist itself as it goes along.
470 * @param[in] dp the partition whose dlist we are flushing
473 _VVGC_flush_dlist(struct DiskPartition64 *dp)
476 VVGCache_dlist_entry_t *ent, *nent;
478 for (i = 0; i < VolumeHashTable.Size; i++) {
479 for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[i],
481 VVGCache_dlist_entry)) {
483 _VVGC_entry_purge_r(dp, ent->parent, ent->child);
491 * add a VGC entry to the partition's to-delete list.
493 * This adds a VGC entry (a parent/child pair) to a list of VGC entries to
494 * be deleted from the VGC at the end of a VGC scan. This is necessary,
495 * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan
496 * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation
497 * may delete a volume, only for it to be added again when the VGC scan's
498 * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan
499 * is running, this function must be called to ensure it does not come
502 * @param[in] dp the partition to whose dlist we are adding
503 * @param[in] parent the parent volumeID of the VGC entry
504 * @param[in] child the child volumeID of the VGC entry
506 * @return operation status
508 * @retval ENOMEM memory allocation error
510 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
512 * @internal VGC use only
515 _VVGC_dlist_add_r(struct DiskPartition64 *dp, VolumeId parent,
518 int bucket = VVGC_HASH(child);
519 VVGCache_dlist_entry_t *entry;
521 entry = malloc(sizeof(*entry));
526 entry->child = child;
527 entry->parent = parent;
529 queue_Append(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
535 * delete a VGC entry from the partition's to-delete list.
537 * When a VGC scan is ocurring, and a volume is removed, but then created
538 * again, we need to ensure that it does not get deleted from being on the
539 * dlist. Call this function whenever adding a new entry to the VGC during
540 * a VGC scan to ensure it doesn't get deleted later.
542 * @param[in] dp the partition from whose dlist we are deleting
543 * @param[in] parent the parent volumeID of the VGC entry
544 * @param[in] child the child volumeID of the VGC entry
546 * @return operation status
548 * @retval ENOENT the specified VGC entry is not on the dlist
550 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
552 * @internal VGC use only
554 * @see _VVGC_dlist_add_r
557 _VVGC_dlist_del_r(struct DiskPartition64 *dp, VolumeId parent,
560 VVGCache_dlist_entry_t *ent;
562 ent = _VVGC_dlist_lookup_r(dp, parent, child);
573 #endif /* AFS_DEMAND_ATTACH_FS */