1 /******************************************************************************
4 * A fully recycling epoch-based garbage collector. Works by counting
5 * threads in and out of critical regions, to work out when
6 * garbage queues can be fully deleted.
8 * Copyright (c) 2001-2003, K A Fraser
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution. Neither the name of the Keir Fraser
20 * nor the names of its contributors may be used to endorse or
21 * promote products derived from this software without specific
22 * prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include "portable_defns.h"
45 #include <afs/afsutil.h>
47 /*#define MINIMAL_GC*/
48 /*#define YIELD_TO_HELP_PROGRESS*/
51 /* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */
52 #define INVALID_BYTE 0
53 #define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c));
55 /* Number of unique block sizes we can deal with. Equivalently, the
56 * number of unique object caches which can be created. */
62 * The initial number of allocation chunks for each per-blocksize list.
63 * Popular allocation lists will steadily increase the allocation unit
64 * in line with demand.
66 #define ALLOC_CHUNKS_PER_LIST 10
69 * How many times should a thread call gc_enter(), seeing the same epoch
70 * each time, before it makes a reclaim attempt?
72 #define ENTRIES_PER_RECLAIM_ATTEMPT 100
75 * 0: current epoch -- threads are moving to this;
76 * -1: some threads may still throw garbage into this epoch;
77 * -2: no threads can see this epoch => we can zero garbage lists;
78 * -3: all threads see zeros in these garbage lists => move to alloc lists.
87 * A chunk amortises the cost of allocation from shared lists. It also
88 * helps when zeroing nodes, as it increases per-cacheline pointer density
89 * and means that node locations don't need to be brought into the cache
90 * (most architectures have a non-temporal store instruction).
92 #define BLKS_PER_CHUNK 100
93 typedef struct chunk_st chunk_t;
96 chunk_t *next; /* chunk chaining */
97 unsigned int i; /* the next entry in blk[] to use */
98 void *blk[BLKS_PER_CHUNK];
101 static struct gc_global_st
105 /* The current epoch. */
106 VOLATILE unsigned int current;
109 /* Exclusive access to gc_reclaim(). */
110 VOLATILE unsigned int inreclaim;
114 /* Allocator caches currently defined */
118 * RUN-TIME CONSTANTS (to first approximation)
121 /* Memory page size, in bytes. */
122 unsigned int page_size;
124 /* Node sizes (run-time constants). */
126 int blk_sizes[MAX_SIZES];
128 /* Registered epoch hooks. */
130 hook_fn_t hook_fns[MAX_HOOKS];
134 * DATA WE MAY HIT HARD
137 /* Chain of free, empty chunks. */
138 chunk_t * VOLATILE free_chunks;
140 /* Main allocation lists. */
141 chunk_t * VOLATILE alloc[MAX_SIZES];
142 VOLATILE unsigned int alloc_size[MAX_SIZES];
144 VOLATILE unsigned int total_size;
145 VOLATILE unsigned int allocations;
150 /* Per-thread state. */
153 /* Epoch that this thread sees. */
156 /* Number of calls to gc_entry() since last gc_reclaim() attempt. */
157 unsigned int entries_since_reclaim;
159 #ifdef YIELD_TO_HELP_PROGRESS
160 /* Number of calls to gc_reclaim() since we last yielded. */
161 unsigned int reclaim_attempts_since_yield;
164 /* Used by gc_async_barrier(). */
166 int async_page_state;
169 chunk_t *garbage[NR_EPOCHS][MAX_SIZES];
170 chunk_t *garbage_tail[NR_EPOCHS][MAX_SIZES];
171 chunk_t *chunk_cache;
173 /* Local allocation lists. */
174 chunk_t *alloc[MAX_SIZES];
175 unsigned int alloc_chunks[MAX_SIZES];
177 /* Hook pointer lists. */
178 chunk_t *hook[NR_EPOCHS][MAX_HOOKS];
184 #define MEM_FAIL(_s) \
186 fprintf(stderr, "OUT OF MEMORY: %d bytes at line %d\n", (_s), __LINE__); \
191 /* Allocate more empty chunks from the heap. */
192 #define CHUNKS_PER_ALLOC 1000
193 static chunk_t *alloc_more_chunks(void)
198 h = p = ALIGNED_ALLOC(CHUNKS_PER_ALLOC * sizeof(*h));
199 if ( h == NULL ) MEM_FAIL(CHUNKS_PER_ALLOC * sizeof(*h));
201 for ( i = 1; i < CHUNKS_PER_ALLOC; i++ )
213 /* Put a chain of chunks onto a list. */
214 static void add_chunks_to_list(chunk_t *ch, chunk_t *head)
216 chunk_t *h_next, *new_h_next, *ch_next;
218 new_h_next = head->next;
219 do { ch->next = h_next = new_h_next; WMB_NEAR_CAS(); }
220 while ( (new_h_next = CASPO(&head->next, h_next, ch_next)) != h_next );
224 /* Allocate a chain of @n empty chunks. Pointers may be garbage. */
225 static chunk_t *get_empty_chunks(int n)
228 chunk_t *new_rh, *rh, *rt, *head;
231 head = gc_global.free_chunks;
236 WEAK_DEP_ORDER_RMB();
237 for ( i = 0; i < n; i++ )
239 if ( (rt = rt->next) == head )
241 /* Allocate some more chunks. */
242 add_chunks_to_list(alloc_more_chunks(), head);
247 while ( (new_rh = CASPO(&head->next, rh, rt->next)) != rh );
254 /* Get @n filled chunks, pointing at blocks of @sz bytes each. */
255 static chunk_t *get_filled_chunks(int n, int sz)
262 ADD_TO(gc_global.total_size, n * BLKS_PER_CHUNK * sz);
263 ADD_TO(gc_global.allocations, 1);
266 node = ALIGNED_ALLOC(n * BLKS_PER_CHUNK * sz);
267 if ( node == NULL ) MEM_FAIL(n * BLKS_PER_CHUNK * sz);
268 #ifdef WEAK_MEM_ORDER
269 INITIALISE_NODES(node, n * BLKS_PER_CHUNK * sz);
272 h = p = get_empty_chunks(n);
274 p->i = BLKS_PER_CHUNK;
275 for ( i = 0; i < BLKS_PER_CHUNK; i++ )
281 while ( (p = p->next) != h );
288 * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do
289 * this by causing a TLB shootdown to be propagated to all other processors.
290 * Each time such an action is required, this function calls:
291 * mprotect(async_page, <page size>, <new flags>)
292 * Each thread's state contains a memory page dedicated for this purpose.
294 #ifdef WEAK_MEM_ORDER
295 static void gc_async_barrier(gc_t *gc)
297 mprotect(gc->async_page, gc_global.page_size,
298 gc->async_page_state ? PROT_READ : PROT_NONE);
299 gc->async_page_state = !gc->async_page_state;
302 #define gc_async_barrier(_g) ((void)0)
306 /* Grab a level @i allocation chunk from main chain. */
307 static chunk_t *get_alloc_chunk(gc_t *gc, int i)
309 chunk_t *alloc, *p, *new_p, *nh;
312 alloc = gc_global.alloc[i];
319 sz = gc_global.alloc_size[i];
320 nh = get_filled_chunks(sz, gc_global.blk_sizes[i]);
321 ADD_TO(gc_global.alloc_size[i], sz >> 3);
322 gc_async_barrier(gc);
323 add_chunks_to_list(nh, alloc);
326 WEAK_DEP_ORDER_RMB();
328 while ( (new_p = CASPO(&alloc->next, p, p->next)) != p );
331 assert(p->i == BLKS_PER_CHUNK);
338 * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest
339 * maximum epoch number seen by a thread that's in the list code. If it's the
340 * current epoch, the "nearly-free" lists from the previous epoch are
341 * reclaimed, and the epoch is incremented.
343 static void gc_reclaim(void)
345 ptst_t *ptst, *first_ptst, *our_ptst = NULL;
347 unsigned long curr_epoch;
349 int two_ago, three_ago, i, j;
351 /* Barrier to entering the reclaim critical section. */
352 if ( gc_global.inreclaim || CASIO(&gc_global.inreclaim, 0, 1) ) return;
355 * Grab first ptst structure *before* barrier -- prevent bugs
356 * on weak-ordered architectures.
358 first_ptst = ptst_first();
360 curr_epoch = gc_global.current;
362 /* Have all threads seen the current epoch, or not in mutator code? */
363 for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
365 if ( (ptst->count > 1) && (ptst->gc->epoch != curr_epoch) ) goto out;
369 * Three-epoch-old garbage lists move to allocation lists.
370 * Two-epoch-old garbage lists are cleaned out.
372 two_ago = (curr_epoch+2) % NR_EPOCHS;
373 three_ago = (curr_epoch+1) % NR_EPOCHS;
374 if ( gc_global.nr_hooks != 0 )
375 our_ptst = (ptst_t *)pthread_getspecific(ptst_key);
376 for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
380 for ( i = 0; i < gc_global.nr_sizes; i++ )
382 #ifdef WEAK_MEM_ORDER
383 int sz = gc_global.blk_sizes[i];
384 if ( gc->garbage[two_ago][i] != NULL )
386 chunk_t *head = gc->garbage[two_ago][i];
390 for ( j = 0; j < ch->i; j++ )
391 INITIALISE_NODES(ch->blk[j], sz);
393 while ( (ch = ch->next) != head );
397 /* NB. Leave one chunk behind, as it is probably not yet full. */
398 t = gc->garbage[three_ago][i];
399 if ( (t == NULL) || ((ch = t->next) == t) ) continue;
400 gc->garbage_tail[three_ago][i]->next = ch;
401 gc->garbage_tail[three_ago][i] = t;
403 add_chunks_to_list(ch, gc_global.alloc[i]);
406 for ( i = 0; i < gc_global.nr_hooks; i++ )
408 hook_fn_t fn = gc_global.hook_fns[i];
409 ch = gc->hook[three_ago][i];
410 if ( ch == NULL ) continue;
411 gc->hook[three_ago][i] = NULL;
414 do { for ( j = 0; j < t->i; j++ ) fn(our_ptst, t->blk[j]); }
415 while ( (t = t->next) != ch );
417 add_chunks_to_list(ch, gc_global.free_chunks);
421 /* Update current epoch. */
423 gc_global.current = (curr_epoch+1) % NR_EPOCHS;
426 gc_global.inreclaim = 0;
428 #endif /* MINIMAL_GC */
431 void *gc_alloc(ptst_t *ptst, int alloc_id)
436 ch = gc->alloc[alloc_id];
439 if ( gc->alloc_chunks[alloc_id]++ == 100 )
441 gc->alloc_chunks[alloc_id] = 0;
442 add_chunks_to_list(ch, gc_global.free_chunks);
443 gc->alloc[alloc_id] = ch = get_alloc_chunk(gc, alloc_id);
448 ch = get_alloc_chunk(gc, alloc_id);
449 ch->next = och->next;
451 gc->alloc[alloc_id] = ch;
455 return ch->blk[--ch->i];
459 gc_get_blocksize(int alloc_id)
461 return (gc_global.blk_sizes[alloc_id]);
464 static chunk_t *chunk_from_cache(gc_t *gc)
466 chunk_t *ch = gc->chunk_cache, *p = ch->next;
470 gc->chunk_cache = get_empty_chunks(100);
483 void gc_free(ptst_t *ptst, void *p, int alloc_id)
487 chunk_t *prev, *new, *ch = gc->garbage[gc->epoch][alloc_id];
491 gc->garbage[gc->epoch][alloc_id] = ch = chunk_from_cache(gc);
492 gc->garbage_tail[gc->epoch][alloc_id] = ch;
494 else if ( ch->i == BLKS_PER_CHUNK )
496 prev = gc->garbage_tail[gc->epoch][alloc_id];
497 new = chunk_from_cache(gc);
498 gc->garbage[gc->epoch][alloc_id] = new;
504 ch->blk[ch->i++] = p;
509 void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id)
512 chunk_t *och, *ch = gc->hook[gc->epoch][hook_id];
516 gc->hook[gc->epoch][hook_id] = ch = chunk_from_cache(gc);
521 if ( ch->i == BLKS_PER_CHUNK )
523 och = gc->hook[gc->epoch][hook_id];
524 ch = chunk_from_cache(gc);
525 ch->next = och->next;
530 ch->blk[ch->i++] = ptr;
534 void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id)
539 ch = gc->alloc[alloc_id];
540 if ( ch->i < BLKS_PER_CHUNK )
542 ch->blk[ch->i++] = p;
546 gc_free(ptst, p, alloc_id);
551 void gc_enter(ptst_t *ptst)
565 new_epoch = gc_global.current;
566 if ( gc->epoch != new_epoch )
568 gc->epoch = new_epoch;
569 gc->entries_since_reclaim = 0;
570 #ifdef YIELD_TO_HELP_PROGRESS
571 gc->reclaim_attempts_since_yield = 0;
574 else if ( gc->entries_since_reclaim++ == 100 )
577 #ifdef YIELD_TO_HELP_PROGRESS
578 if ( gc->reclaim_attempts_since_yield++ == 10000 )
580 gc->reclaim_attempts_since_yield = 0;
584 gc->entries_since_reclaim = 0;
593 void gc_exit(ptst_t *ptst)
605 gc = ALIGNED_ALLOC(sizeof(*gc));
606 if ( gc == NULL ) MEM_FAIL(sizeof(*gc));
607 memset(gc, 0, sizeof(*gc));
609 #ifdef WEAK_MEM_ORDER
610 /* Initialise shootdown state. */
611 gc->async_page = mmap(NULL, gc_global.page_size, PROT_NONE,
612 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
613 if ( gc->async_page == (void *)MAP_FAILED ) MEM_FAIL(gc_global.page_size);
614 gc->async_page_state = 1;
617 gc->chunk_cache = get_empty_chunks(100);
619 /* Get ourselves a set of allocation chunks. */
620 for ( i = 0; i < gc_global.nr_sizes; i++ )
622 gc->alloc[i] = get_alloc_chunk(gc, i);
624 for ( ; i < MAX_SIZES; i++ )
626 gc->alloc[i] = chunk_from_cache(gc);
634 gc_add_allocator(int alloc_size)
639 FASPO(&gc_global.n_allocators, gc_global.n_allocators + 1);
640 if (gc_global.n_allocators > MAX_SIZES) {
643 printf("MCAS gc max allocators exceeded, aborting\n");
648 i = gc_global.nr_sizes;
649 while ((ni = CASIO(&gc_global.nr_sizes, i, i + 1)) != i)
651 gc_global.blk_sizes[i] = alloc_size;
652 gc_global.alloc_size[i] = ALLOC_CHUNKS_PER_LIST;
653 gc_global.alloc[i] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST, alloc_size);
658 void gc_remove_allocator(int alloc_id)
660 /* This is a no-op for now. */
664 int gc_add_hook(hook_fn_t fn)
666 int ni, i = gc_global.nr_hooks;
667 while ( (ni = CASIO(&gc_global.nr_hooks, i, i+1)) != i ) i = ni;
668 gc_global.hook_fns[i] = fn;
673 void gc_remove_hook(int hook_id)
675 /* This is a no-op for now. */
679 void _destroy_gc_subsystem(void)
682 printf("Total heap: %u bytes (%.2fMB) in %u allocations\n",
683 gc_global.total_size, (double)gc_global.total_size / 1000000,
684 gc_global.allocations);
689 void _init_gc_subsystem(void)
691 memset(&gc_global, 0, sizeof(gc_global));
693 gc_global.page_size = (unsigned int)sysconf(_SC_PAGESIZE);
694 gc_global.free_chunks = alloc_more_chunks();
696 gc_global.nr_hooks = 0;
697 gc_global.nr_sizes = 0;