1 /******************************************************************************
4 * A fully recycling epoch-based garbage collector. Works by counting
5 * threads in and out of critical regions, to work out when
6 * garbage queues can be fully deleted.
8 * Copyright (c) 2001-2003, K A Fraser
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution. Neither the name of the Keir Fraser
20 * nor the names of its contributors may be used to endorse or
21 * promote products derived from this software without specific
22 * prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include "portable_defns.h"
46 #include <afsconfig.h>
47 #include <afs/param.h>
48 #include <afs/afsutil.h>
50 /*#define MINIMAL_GC*/
51 /*#define YIELD_TO_HELP_PROGRESS*/
54 /* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */
55 #define INVALID_BYTE 0
56 #define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c));
58 /* Number of unique block sizes we can deal with. Equivalently, the
59 * number of unique object caches which can be created. */
65 * The initial number of allocation chunks for each per-blocksize list.
66 * Popular allocation lists will steadily increase the allocation unit
67 * in line with demand.
69 #define ALLOC_CHUNKS_PER_LIST 10
72 * How many times should a thread call gc_enter(), seeing the same epoch
73 * each time, before it makes a reclaim attempt?
75 #define ENTRIES_PER_RECLAIM_ATTEMPT 100
78 * 0: current epoch -- threads are moving to this;
79 * -1: some threads may still throw garbage into this epoch;
80 * -2: no threads can see this epoch => we can zero garbage lists;
81 * -3: all threads see zeros in these garbage lists => move to alloc lists.
90 * A chunk amortises the cost of allocation from shared lists. It also
91 * helps when zeroing nodes, as it increases per-cacheline pointer density
92 * and means that node locations don't need to be brought into the cache
93 * (most architectures have a non-temporal store instruction).
95 #define BLKS_PER_CHUNK 100
96 typedef struct chunk_st chunk_t;
99 chunk_t *next; /* chunk chaining */
100 unsigned int i; /* the next entry in blk[] to use */
101 void *blk[BLKS_PER_CHUNK];
104 static struct gc_global_st
108 /* The current epoch. */
109 VOLATILE unsigned int current;
112 /* Exclusive access to gc_reclaim(). */
113 VOLATILE unsigned int inreclaim;
117 /* Allocator caches currently defined */
121 * RUN-TIME CONSTANTS (to first approximation)
124 /* Memory page size, in bytes. */
125 unsigned int page_size;
127 /* Node sizes (run-time constants). */
129 int blk_sizes[MAX_SIZES];
131 /* tags (trace support) */
132 char *tags[MAX_SIZES];
134 /* Registered epoch hooks. */
136 hook_fn_t hook_fns[MAX_HOOKS];
140 * DATA WE MAY HIT HARD
143 /* Chain of free, empty chunks. */
144 chunk_t * VOLATILE free_chunks;
146 /* Main allocation lists. */
147 chunk_t * VOLATILE alloc[MAX_SIZES];
148 VOLATILE unsigned int alloc_size[MAX_SIZES];
150 VOLATILE unsigned int total_size;
151 VOLATILE unsigned int allocations;
156 /* Per-thread state. */
159 /* Epoch that this thread sees. */
162 /* Number of calls to gc_entry() since last gc_reclaim() attempt. */
163 unsigned int entries_since_reclaim;
165 #ifdef YIELD_TO_HELP_PROGRESS
166 /* Number of calls to gc_reclaim() since we last yielded. */
167 unsigned int reclaim_attempts_since_yield;
170 /* Used by gc_async_barrier(). */
172 int async_page_state;
175 chunk_t *garbage[NR_EPOCHS][MAX_SIZES];
176 chunk_t *garbage_tail[NR_EPOCHS][MAX_SIZES];
177 chunk_t *chunk_cache;
179 /* Local allocation lists. */
180 chunk_t *alloc[MAX_SIZES];
181 unsigned int alloc_chunks[MAX_SIZES];
183 /* Hook pointer lists. */
184 chunk_t *hook[NR_EPOCHS][MAX_HOOKS];
190 #define MEM_FAIL(_s) \
192 fprintf(stderr, "OUT OF MEMORY: %d bytes at line %d\n", (_s), __LINE__); \
197 /* Allocate more empty chunks from the heap. */
198 #define CHUNKS_PER_ALLOC 1000
199 static chunk_t *alloc_more_chunks(void)
204 ViceLog(11, ("GC: alloc_more_chunks alloc %lu chunks\n",
207 h = p = ALIGNED_ALLOC(CHUNKS_PER_ALLOC * sizeof(*h));
208 if ( h == NULL ) MEM_FAIL(CHUNKS_PER_ALLOC * sizeof(*h));
210 for ( i = 1; i < CHUNKS_PER_ALLOC; i++ )
222 /* Put a chain of chunks onto a list. */
223 static void add_chunks_to_list(chunk_t *ch, chunk_t *head)
225 chunk_t *h_next, *new_h_next, *ch_next;
227 new_h_next = head->next;
228 do { ch->next = h_next = new_h_next; WMB_NEAR_CAS(); }
229 while ( (new_h_next = CASPO(&head->next, h_next, ch_next)) != h_next );
233 /* Allocate a chain of @n empty chunks. Pointers may be garbage. */
234 static chunk_t *get_empty_chunks(int n)
237 chunk_t *new_rh, *rh, *rt, *head;
240 head = gc_global.free_chunks;
245 WEAK_DEP_ORDER_RMB();
246 for ( i = 0; i < n; i++ )
248 if ( (rt = rt->next) == head )
250 /* Allocate some more chunks. */
251 add_chunks_to_list(alloc_more_chunks(), head);
256 while ( (new_rh = CASPO(&head->next, rh, rt->next)) != rh );
263 /* Get @n filled chunks, pointing at blocks of @sz bytes each. */
264 static chunk_t *get_filled_chunks(int n, int sz)
271 ADD_TO(gc_global.total_size, n * BLKS_PER_CHUNK * sz);
272 ADD_TO(gc_global.allocations, 1);
275 node = ALIGNED_ALLOC(n * BLKS_PER_CHUNK * sz);
276 if ( node == NULL ) MEM_FAIL(n * BLKS_PER_CHUNK * sz);
277 #ifdef WEAK_MEM_ORDER
278 INITIALISE_NODES(node, n * BLKS_PER_CHUNK * sz);
281 h = p = get_empty_chunks(n);
283 p->i = BLKS_PER_CHUNK;
284 for ( i = 0; i < BLKS_PER_CHUNK; i++ )
290 while ( (p = p->next) != h );
297 * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do
298 * this by causing a TLB shootdown to be propagated to all other processors.
299 * Each time such an action is required, this function calls:
300 * mprotect(async_page, <page size>, <new flags>)
301 * Each thread's state contains a memory page dedicated for this purpose.
303 #ifdef WEAK_MEM_ORDER
304 static void gc_async_barrier(gc_t *gc)
306 mprotect(gc->async_page, gc_global.page_size,
307 gc->async_page_state ? PROT_READ : PROT_NONE);
308 gc->async_page_state = !gc->async_page_state;
311 #define gc_async_barrier(_g) ((void)0)
315 /* Grab a level @i allocation chunk from main chain. */
316 static chunk_t *get_alloc_chunk(gc_t *gc, int i)
318 chunk_t *alloc, *p, *new_p, *nh;
321 alloc = gc_global.alloc[i];
328 sz = gc_global.alloc_size[i];
329 nh = get_filled_chunks(sz, gc_global.blk_sizes[i]);
330 ADD_TO(gc_global.alloc_size[i], sz >> 3);
331 gc_async_barrier(gc);
332 add_chunks_to_list(nh, alloc);
335 WEAK_DEP_ORDER_RMB();
337 while ( (new_p = CASPO(&alloc->next, p, p->next)) != p );
340 assert(p->i == BLKS_PER_CHUNK);
347 * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest
348 * maximum epoch number seen by a thread that's in the list code. If it's the
349 * current epoch, the "nearly-free" lists from the previous epoch are
350 * reclaimed, and the epoch is incremented.
352 static void gc_reclaim(void)
354 ptst_t *ptst, *first_ptst, *our_ptst = NULL;
356 unsigned long curr_epoch;
358 int two_ago, three_ago, i, j;
360 ViceLog(11, ("GC: gc_reclaim enter\n"));
362 /* Barrier to entering the reclaim critical section. */
363 if ( gc_global.inreclaim || CASIO(&gc_global.inreclaim, 0, 1) ) return;
365 ViceLog(11, ("GC: gc_reclaim after inreclaim barrier\n"));
368 * Grab first ptst structure *before* barrier -- prevent bugs
369 * on weak-ordered architectures.
371 first_ptst = ptst_first();
373 curr_epoch = gc_global.current;
375 /* Have all threads seen the current epoch, or not in mutator code? */
376 for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
378 if ( (ptst->count > 1) && (ptst->gc->epoch != curr_epoch) ) goto out;
382 ViceLog(11, ("GC: gc_reclaim all-threads see current epoch\n"));
385 * Three-epoch-old garbage lists move to allocation lists.
386 * Two-epoch-old garbage lists are cleaned out.
388 two_ago = (curr_epoch+2) % NR_EPOCHS;
389 three_ago = (curr_epoch+1) % NR_EPOCHS;
390 if ( gc_global.nr_hooks != 0 )
391 our_ptst = (ptst_t *)pthread_getspecific(ptst_key);
392 for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
396 for ( i = 0; i < gc_global.nr_sizes; i++ )
398 #ifdef WEAK_MEM_ORDER
399 int sz = gc_global.blk_sizes[i];
400 if ( gc->garbage[two_ago][i] != NULL )
402 chunk_t *head = gc->garbage[two_ago][i];
406 for ( j = 0; j < ch->i; j++ )
407 INITIALISE_NODES(ch->blk[j], sz);
409 while ( (ch = ch->next) != head );
413 /* NB. Leave one chunk behind, as it is probably not yet full. */
414 t = gc->garbage[three_ago][i];
415 if ( (t == NULL) || ((ch = t->next) == t) ) continue;
416 gc->garbage_tail[three_ago][i]->next = ch;
417 gc->garbage_tail[three_ago][i] = t;
420 /* gc inst: compute and log size of returned list */
422 chunk_t *ch_head, *ch_next;
423 int r_ix, r_len, r_size;
428 /* XXX: nonfatal, may be missing multiplier */
432 } while (ch->next && (ch->next != ch_head)
433 && (ch_next = ch->next));
435 ViceLog(11, ("GC: return %d chunks of size %d to "
436 "gc_global.alloc[%d]\n",
438 gc_global.blk_sizes[i],
443 add_chunks_to_list(ch, gc_global.alloc[i]);
446 for ( i = 0; i < gc_global.nr_hooks; i++ )
448 hook_fn_t fn = gc_global.hook_fns[i];
449 ch = gc->hook[three_ago][i];
450 if ( ch == NULL ) continue;
451 gc->hook[three_ago][i] = NULL;
454 do { for ( j = 0; j < t->i; j++ ) fn(our_ptst, t->blk[j]); }
455 while ( (t = t->next) != ch );
457 /* gc inst: compute and log size of returned list */
459 chunk_t *ch_head, *ch_next;
460 int r_ix, r_len, r_size;
464 /* XXX: nonfatal, may be missing multiplier */
468 } while (ch->next && (ch->next != ch_head)
469 && (ch_next = ch->next));
471 ViceLog(11, ("GC: return %d chunks to gc_global.free_chunks\n",
475 add_chunks_to_list(ch, gc_global.free_chunks);
479 /* Update current epoch. */
480 ViceLog(11, ("GC: gc_reclaim epoch transition (leaving %lu)\n",
484 gc_global.current = (curr_epoch+1) % NR_EPOCHS;
487 gc_global.inreclaim = 0;
489 #endif /* MINIMAL_GC */
492 void *gc_alloc(ptst_t *ptst, int alloc_id)
497 ch = gc->alloc[alloc_id];
500 if ( gc->alloc_chunks[alloc_id]++ == 100 )
502 gc->alloc_chunks[alloc_id] = 0;
503 add_chunks_to_list(ch, gc_global.free_chunks);
504 gc->alloc[alloc_id] = ch = get_alloc_chunk(gc, alloc_id);
509 ch = get_alloc_chunk(gc, alloc_id);
510 ch->next = och->next;
512 gc->alloc[alloc_id] = ch;
516 return ch->blk[--ch->i];
520 gc_get_blocksize(int alloc_id)
522 return (gc_global.blk_sizes[alloc_id]);
526 gc_get_tag(int alloc_id)
528 return (gc_global.tags[alloc_id]);
531 static chunk_t *chunk_from_cache(gc_t *gc)
533 chunk_t *ch = gc->chunk_cache, *p = ch->next;
537 gc->chunk_cache = get_empty_chunks(100);
550 void gc_free(ptst_t *ptst, void *p, int alloc_id)
554 chunk_t *prev, *new, *ch = gc->garbage[gc->epoch][alloc_id];
558 gc->garbage[gc->epoch][alloc_id] = ch = chunk_from_cache(gc);
559 gc->garbage_tail[gc->epoch][alloc_id] = ch;
561 else if ( ch->i == BLKS_PER_CHUNK )
563 prev = gc->garbage_tail[gc->epoch][alloc_id];
564 new = chunk_from_cache(gc);
565 gc->garbage[gc->epoch][alloc_id] = new;
571 ch->blk[ch->i++] = p;
576 void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id)
579 chunk_t *och, *ch = gc->hook[gc->epoch][hook_id];
583 gc->hook[gc->epoch][hook_id] = ch = chunk_from_cache(gc);
588 if ( ch->i == BLKS_PER_CHUNK )
590 och = gc->hook[gc->epoch][hook_id];
591 ch = chunk_from_cache(gc);
592 ch->next = och->next;
597 ch->blk[ch->i++] = ptr;
601 void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id)
606 ch = gc->alloc[alloc_id];
607 if ( ch->i < BLKS_PER_CHUNK )
609 ch->blk[ch->i++] = p;
613 gc_free(ptst, p, alloc_id);
618 void gc_enter(ptst_t *ptst)
632 new_epoch = gc_global.current;
633 if ( gc->epoch != new_epoch )
635 gc->epoch = new_epoch;
636 gc->entries_since_reclaim = 0;
637 #ifdef YIELD_TO_HELP_PROGRESS
638 gc->reclaim_attempts_since_yield = 0;
641 else if ( gc->entries_since_reclaim++ == 100 )
644 #ifdef YIELD_TO_HELP_PROGRESS
645 if ( gc->reclaim_attempts_since_yield++ == 10000 )
647 gc->reclaim_attempts_since_yield = 0;
651 gc->entries_since_reclaim = 0;
660 void gc_exit(ptst_t *ptst)
672 gc = ALIGNED_ALLOC(sizeof(*gc));
673 if ( gc == NULL ) MEM_FAIL(sizeof(*gc));
674 memset(gc, 0, sizeof(*gc));
676 #ifdef WEAK_MEM_ORDER
677 /* Initialise shootdown state. */
678 gc->async_page = mmap(NULL, gc_global.page_size, PROT_NONE,
679 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
680 if ( gc->async_page == (void *)MAP_FAILED ) MEM_FAIL(gc_global.page_size);
681 gc->async_page_state = 1;
684 gc->chunk_cache = get_empty_chunks(100);
686 /* Get ourselves a set of allocation chunks. */
687 for ( i = 0; i < gc_global.nr_sizes; i++ )
689 gc->alloc[i] = get_alloc_chunk(gc, i);
691 for ( ; i < MAX_SIZES; i++ )
693 gc->alloc[i] = chunk_from_cache(gc);
701 gc_add_allocator(int alloc_size, char *tag)
706 FASPO(&gc_global.n_allocators, gc_global.n_allocators + 1);
707 if (gc_global.n_allocators > MAX_SIZES) {
710 printf("MCAS gc max allocators exceeded, aborting\n");
715 i = gc_global.nr_sizes;
716 while ((ni = CASIO(&gc_global.nr_sizes, i, i + 1)) != i)
718 gc_global.blk_sizes[i] = alloc_size;
719 gc_global.tags[i] = strdup(tag);
720 gc_global.alloc_size[i] = ALLOC_CHUNKS_PER_LIST;
721 gc_global.alloc[i] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST, alloc_size);
726 void gc_remove_allocator(int alloc_id)
728 /* This is a no-op for now. */
732 int gc_add_hook(hook_fn_t fn)
734 int ni, i = gc_global.nr_hooks;
735 while ( (ni = CASIO(&gc_global.nr_hooks, i, i+1)) != i ) i = ni;
736 gc_global.hook_fns[i] = fn;
741 void gc_remove_hook(int hook_id)
743 /* This is a no-op for now. */
747 void _destroy_gc_subsystem(void)
750 printf("Total heap: %u bytes (%.2fMB) in %u allocations\n",
751 gc_global.total_size, (double)gc_global.total_size / 1000000,
752 gc_global.allocations);
757 void _init_gc_subsystem(void)
759 memset(&gc_global, 0, sizeof(gc_global));
761 gc_global.page_size = (unsigned int)sysconf(_SC_PAGESIZE);
762 gc_global.free_chunks = alloc_more_chunks();
764 gc_global.nr_hooks = 0;
765 gc_global.nr_sizes = 0;