/****************************************************************************** * gc.c * * A fully recycling epoch-based garbage collector. Works by counting * threads in and out of critical regions, to work out when * garbage queues can be fully deleted. * * Copyright (c) 2001-2003, K A Fraser Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. Neither the name of the Keir Fraser * nor the names of its contributors may be used to endorse or * promote products derived from this software without specific * prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include "portable_defns.h" #include "gc.h" #include /*#define MINIMAL_GC*/ /*#define YIELD_TO_HELP_PROGRESS*/ #define PROFILE_GC /* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */ #define INVALID_BYTE 0 #define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c)) /* Number of unique block sizes we can deal with. Equivalently, the * number of unique object caches which can be created. */ #define MAX_SIZES 30 #define MAX_HOOKS 4 /* * The initial number of allocation chunks for each per-blocksize list. * Popular allocation lists will steadily increase the allocation unit * in line with demand. */ #define ALLOC_CHUNKS_PER_LIST 10 /* * How many times should a thread call gc_enter(), seeing the same epoch * each time, before it makes a reclaim attempt? */ #define ENTRIES_PER_RECLAIM_ATTEMPT 100 /* * 0: current epoch -- threads are moving to this; * -1: some threads may still throw garbage into this epoch; * -2: no threads can see this epoch => we can zero garbage lists; * -3: all threads see zeros in these garbage lists => move to alloc lists. */ #ifdef WEAK_MEM_ORDER #define NR_EPOCHS 4 #else #define NR_EPOCHS 3 #endif /* * A chunk amortises the cost of allocation from shared lists. It also * helps when zeroing nodes, as it increases per-cacheline pointer density * and means that node locations don't need to be brought into the cache * (most architectures have a non-temporal store instruction). */ #define BLKS_PER_CHUNK 100 typedef struct chunk_st chunk_t; struct chunk_st { chunk_t *next; /* chunk chaining */ unsigned int i; /* the next entry in blk[] to use */ void *blk[BLKS_PER_CHUNK]; }; static struct gc_global_st { CACHE_PAD(0); /* The current epoch. */ VOLATILE unsigned int current; CACHE_PAD(1); /* Exclusive access to gc_reclaim(). */ VOLATILE unsigned int inreclaim; CACHE_PAD(2); /* Allocator caches currently defined */ long n_allocators; /* * RUN-TIME CONSTANTS (to first approximation) */ /* Memory page size, in bytes. */ unsigned int page_size; /* Node sizes (run-time constants). */ int nr_sizes; int blk_sizes[MAX_SIZES]; /* tags (trace support) */ char *tags[MAX_SIZES]; /* Registered epoch hooks. */ int nr_hooks; hook_fn_t hook_fns[MAX_HOOKS]; CACHE_PAD(3); /* * DATA WE MAY HIT HARD */ /* Chain of free, empty chunks. */ chunk_t * VOLATILE free_chunks; /* Main allocation lists. */ chunk_t * VOLATILE alloc[MAX_SIZES]; VOLATILE unsigned int alloc_size[MAX_SIZES]; #ifdef PROFILE_GC VOLATILE unsigned int total_size; VOLATILE unsigned int allocations; #endif } gc_global; /* Per-thread state. */ struct gc_st { /* Epoch that this thread sees. */ unsigned int epoch; /* Number of calls to gc_entry() since last gc_reclaim() attempt. */ unsigned int entries_since_reclaim; #ifdef YIELD_TO_HELP_PROGRESS /* Number of calls to gc_reclaim() since we last yielded. */ unsigned int reclaim_attempts_since_yield; #endif /* Used by gc_async_barrier(). */ void *async_page; int async_page_state; /* Garbage lists. */ chunk_t *garbage[NR_EPOCHS][MAX_SIZES]; chunk_t *garbage_tail[NR_EPOCHS][MAX_SIZES]; chunk_t *chunk_cache; /* Local allocation lists. */ chunk_t *alloc[MAX_SIZES]; unsigned int alloc_chunks[MAX_SIZES]; /* Hook pointer lists. */ chunk_t *hook[NR_EPOCHS][MAX_HOOKS]; }; /* XXX generalize */ #ifndef KERNEL #define MEM_FAIL(_s) \ do { \ fprintf(stderr, "OUT OF MEMORY: %d bytes at line %d\n", (_s), __LINE__); \ abort(); \ } while ( 0 ) #endif /* Allocate more empty chunks from the heap. */ #define CHUNKS_PER_ALLOC 1000 static chunk_t *alloc_more_chunks(void) { int i; chunk_t *h, *p; ViceLog(11, ("GC: alloc_more_chunks alloc %lu chunks\n", CHUNKS_PER_ALLOC)); h = p = ALIGNED_ALLOC(CHUNKS_PER_ALLOC * sizeof(*h)); if ( h == NULL ) MEM_FAIL(CHUNKS_PER_ALLOC * sizeof(*h)); for ( i = 1; i < CHUNKS_PER_ALLOC; i++ ) { p->next = p + 1; p++; } p->next = h; return(h); } /* Put a chain of chunks onto a list. */ static void add_chunks_to_list(chunk_t *ch, chunk_t *head) { chunk_t *h_next, *new_h_next, *ch_next; ch_next = ch->next; new_h_next = head->next; do { ch->next = h_next = new_h_next; WMB_NEAR_CAS(); } while ( (new_h_next = CASPO(&head->next, h_next, ch_next)) != h_next ); } /* Allocate a chain of @n empty chunks. Pointers may be garbage. */ static chunk_t *get_empty_chunks(int n) { int i; chunk_t *new_rh, *rh, *rt, *head; retry: head = gc_global.free_chunks; new_rh = head->next; do { rh = new_rh; rt = head; WEAK_DEP_ORDER_RMB(); for ( i = 0; i < n; i++ ) { if ( (rt = rt->next) == head ) { /* Allocate some more chunks. */ add_chunks_to_list(alloc_more_chunks(), head); goto retry; } } } while ( (new_rh = CASPO(&head->next, rh, rt->next)) != rh ); rt->next = rh; return(rh); } /* Get @n filled chunks, pointing at blocks of @sz bytes each. */ static chunk_t *get_filled_chunks(int n, int sz) { chunk_t *h, *p; char *node; int i; #ifdef PROFILE_GC ADD_TO(gc_global.total_size, n * BLKS_PER_CHUNK * sz); ADD_TO(gc_global.allocations, 1); #endif node = ALIGNED_ALLOC(n * BLKS_PER_CHUNK * sz); if ( node == NULL ) MEM_FAIL(n * BLKS_PER_CHUNK * sz); #ifdef WEAK_MEM_ORDER INITIALISE_NODES(node, n * BLKS_PER_CHUNK * sz); #endif h = p = get_empty_chunks(n); do { p->i = BLKS_PER_CHUNK; for ( i = 0; i < BLKS_PER_CHUNK; i++ ) { p->blk[i] = node; node += sz; } } while ( (p = p->next) != h ); return(h); } /* * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do * this by causing a TLB shootdown to be propagated to all other processors. * Each time such an action is required, this function calls: * mprotect(async_page, , ) * Each thread's state contains a memory page dedicated for this purpose. */ #ifdef WEAK_MEM_ORDER static void gc_async_barrier(gc_t *gc) { mprotect(gc->async_page, gc_global.page_size, gc->async_page_state ? PROT_READ : PROT_NONE); gc->async_page_state = !gc->async_page_state; } #else #define gc_async_barrier(_g) ((void)0) #endif /* Grab a level @i allocation chunk from main chain. */ static chunk_t *get_alloc_chunk(gc_t *gc, int i) { chunk_t *alloc, *p, *new_p, *nh; unsigned int sz; alloc = gc_global.alloc[i]; new_p = alloc->next; do { p = new_p; while ( p == alloc ) { sz = gc_global.alloc_size[i]; nh = get_filled_chunks(sz, gc_global.blk_sizes[i]); ADD_TO(gc_global.alloc_size[i], sz >> 3); gc_async_barrier(gc); add_chunks_to_list(nh, alloc); p = alloc->next; } WEAK_DEP_ORDER_RMB(); } while ( (new_p = CASPO(&alloc->next, p, p->next)) != p ); p->next = p; assert(p->i == BLKS_PER_CHUNK); return(p); } #ifndef MINIMAL_GC /* * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest * maximum epoch number seen by a thread that's in the list code. If it's the * current epoch, the "nearly-free" lists from the previous epoch are * reclaimed, and the epoch is incremented. */ static void gc_reclaim(void) { ptst_t *ptst, *first_ptst, *our_ptst = NULL; gc_t *gc = NULL; unsigned long curr_epoch; chunk_t *ch, *t; int two_ago, three_ago, i, j; ViceLog(11, ("GC: gc_reclaim enter\n")); /* Barrier to entering the reclaim critical section. */ if ( gc_global.inreclaim || CASIO(&gc_global.inreclaim, 0, 1) ) return; ViceLog(11, ("GC: gc_reclaim after inreclaim barrier\n")); /* * Grab first ptst structure *before* barrier -- prevent bugs * on weak-ordered architectures. */ first_ptst = ptst_first(); MB(); curr_epoch = gc_global.current; /* Have all threads seen the current epoch, or not in mutator code? */ for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) ) { if ( (ptst->count > 1) && (ptst->gc->epoch != curr_epoch) ) goto out; } ViceLog(11, ("GC: gc_reclaim all-threads see current epoch\n")); /* * Three-epoch-old garbage lists move to allocation lists. * Two-epoch-old garbage lists are cleaned out. */ two_ago = (curr_epoch+2) % NR_EPOCHS; three_ago = (curr_epoch+1) % NR_EPOCHS; if ( gc_global.nr_hooks != 0 ) our_ptst = (ptst_t *)pthread_getspecific(ptst_key); for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) ) { gc = ptst->gc; for ( i = 0; i < gc_global.nr_sizes; i++ ) { #ifdef WEAK_MEM_ORDER int sz = gc_global.blk_sizes[i]; if ( gc->garbage[two_ago][i] != NULL ) { chunk_t *head = gc->garbage[two_ago][i]; ch = head; do { int j; for ( j = 0; j < ch->i; j++ ) INITIALISE_NODES(ch->blk[j], sz); } while ( (ch = ch->next) != head ); } #endif /* NB. Leave one chunk behind, as it is probably not yet full. */ t = gc->garbage[three_ago][i]; if ( (t == NULL) || ((ch = t->next) == t) ) continue; gc->garbage_tail[three_ago][i]->next = ch; gc->garbage_tail[three_ago][i] = t; t->next = t; /* gc inst: compute and log size of returned list */ { chunk_t *ch_head, *ch_next; int r_ix, r_len, r_size; r_ix = 0; r_len = 0; r_size = 0; /* XXX: nonfatal, may be missing multiplier */ ch_head = ch; do { r_len++; } while (ch->next && (ch->next != ch_head) && (ch_next = ch->next)); ViceLog(11, ("GC: return %d chunks of size %d to " "gc_global.alloc[%d]\n", r_len, gc_global.blk_sizes[i], i)); } add_chunks_to_list(ch, gc_global.alloc[i]); } for ( i = 0; i < gc_global.nr_hooks; i++ ) { hook_fn_t fn = gc_global.hook_fns[i]; ch = gc->hook[three_ago][i]; if ( ch == NULL ) continue; gc->hook[three_ago][i] = NULL; t = ch; do { for ( j = 0; j < t->i; j++ ) fn(our_ptst, t->blk[j]); } while ( (t = t->next) != ch ); /* gc inst: compute and log size of returned list */ { chunk_t *ch_head, *ch_next; int r_ix, r_len, r_size; r_ix = 0; r_len = 0; /* XXX: nonfatal, may be missing multiplier */ ch_head = ch; do { r_len++; } while (ch->next && (ch->next != ch_head) && (ch_next = ch->next)); ViceLog(11, ("GC: return %d chunks to gc_global.free_chunks\n", r_len)); } add_chunks_to_list(ch, gc_global.free_chunks); } } /* Update current epoch. */ ViceLog(11, ("GC: gc_reclaim epoch transition (leaving %lu)\n", curr_epoch)); WMB(); gc_global.current = (curr_epoch+1) % NR_EPOCHS; out: gc_global.inreclaim = 0; } #endif /* MINIMAL_GC */ void *gc_alloc(ptst_t *ptst, int alloc_id) { gc_t *gc = ptst->gc; chunk_t *ch; ch = gc->alloc[alloc_id]; if ( ch->i == 0 ) { if ( gc->alloc_chunks[alloc_id]++ == 100 ) { gc->alloc_chunks[alloc_id] = 0; add_chunks_to_list(ch, gc_global.free_chunks); gc->alloc[alloc_id] = ch = get_alloc_chunk(gc, alloc_id); } else { chunk_t *och = ch; ch = get_alloc_chunk(gc, alloc_id); ch->next = och->next; och->next = ch; gc->alloc[alloc_id] = ch; } } return ch->blk[--ch->i]; } int gc_get_blocksize(int alloc_id) { return (gc_global.blk_sizes[alloc_id]); } char * gc_get_tag(int alloc_id) { return (gc_global.tags[alloc_id]); } static chunk_t *chunk_from_cache(gc_t *gc) { chunk_t *ch = gc->chunk_cache, *p = ch->next; if ( ch == p ) { gc->chunk_cache = get_empty_chunks(100); } else { ch->next = p->next; p->next = p; } p->i = 0; return(p); } void gc_free(ptst_t *ptst, void *p, int alloc_id) { #ifndef MINIMAL_GC gc_t *gc = ptst->gc; chunk_t *prev, *new, *ch = gc->garbage[gc->epoch][alloc_id]; if ( ch == NULL ) { gc->garbage[gc->epoch][alloc_id] = ch = chunk_from_cache(gc); gc->garbage_tail[gc->epoch][alloc_id] = ch; } else if ( ch->i == BLKS_PER_CHUNK ) { prev = gc->garbage_tail[gc->epoch][alloc_id]; new = chunk_from_cache(gc); gc->garbage[gc->epoch][alloc_id] = new; new->next = ch; prev->next = new; ch = new; } ch->blk[ch->i++] = p; #endif } void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id) { gc_t *gc = ptst->gc; chunk_t *och, *ch = gc->hook[gc->epoch][hook_id]; if ( ch == NULL ) { gc->hook[gc->epoch][hook_id] = ch = chunk_from_cache(gc); } else { ch = ch->next; if ( ch->i == BLKS_PER_CHUNK ) { och = gc->hook[gc->epoch][hook_id]; ch = chunk_from_cache(gc); ch->next = och->next; och->next = ch; } } ch->blk[ch->i++] = ptr; } void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id) { gc_t *gc = ptst->gc; chunk_t *ch; ch = gc->alloc[alloc_id]; if ( ch->i < BLKS_PER_CHUNK ) { ch->blk[ch->i++] = p; } else { gc_free(ptst, p, alloc_id); } } void gc_enter(ptst_t *ptst) { #ifdef MINIMAL_GC ptst->count++; MB(); #else gc_t *gc = ptst->gc; int new_epoch, cnt; retry: cnt = ptst->count++; MB(); if ( cnt == 1 ) { new_epoch = gc_global.current; if ( gc->epoch != new_epoch ) { gc->epoch = new_epoch; gc->entries_since_reclaim = 0; #ifdef YIELD_TO_HELP_PROGRESS gc->reclaim_attempts_since_yield = 0; #endif } else if ( gc->entries_since_reclaim++ == 100 ) { ptst->count--; #ifdef YIELD_TO_HELP_PROGRESS if ( gc->reclaim_attempts_since_yield++ == 10000 ) { gc->reclaim_attempts_since_yield = 0; sched_yield(); } #endif gc->entries_since_reclaim = 0; gc_reclaim(); goto retry; } } #endif } void gc_exit(ptst_t *ptst) { MB(); ptst->count--; } gc_t *gc_init(void) { gc_t *gc; int i; gc = ALIGNED_ALLOC(sizeof(*gc)); if ( gc == NULL ) MEM_FAIL(sizeof(*gc)); memset(gc, 0, sizeof(*gc)); #ifdef WEAK_MEM_ORDER /* Initialise shootdown state. */ gc->async_page = mmap(NULL, gc_global.page_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if ( gc->async_page == (void *)MAP_FAILED ) MEM_FAIL(gc_global.page_size); gc->async_page_state = 1; #endif gc->chunk_cache = get_empty_chunks(100); /* Get ourselves a set of allocation chunks. */ for ( i = 0; i < gc_global.nr_sizes; i++ ) { gc->alloc[i] = get_alloc_chunk(gc, i); } for ( ; i < MAX_SIZES; i++ ) { gc->alloc[i] = chunk_from_cache(gc); } return(gc); } int gc_add_allocator(int alloc_size, char *tag) { int ni, i; RMB(); FASPO(&gc_global.n_allocators, gc_global.n_allocators + 1); if (gc_global.n_allocators > MAX_SIZES) { /* critical error */ #if !defined(KERNEL) printf("MCAS gc max allocators exceeded, aborting\n"); #endif abort(); } i = gc_global.nr_sizes; while ((ni = CASIO(&gc_global.nr_sizes, i, i + 1)) != i) i = ni; gc_global.blk_sizes[i] = alloc_size; gc_global.tags[i] = strdup(tag); gc_global.alloc_size[i] = ALLOC_CHUNKS_PER_LIST; gc_global.alloc[i] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST, alloc_size); return i; } void gc_remove_allocator(int alloc_id) { /* This is a no-op for now. */ } int gc_add_hook(hook_fn_t fn) { int ni, i = gc_global.nr_hooks; while ( (ni = CASIO(&gc_global.nr_hooks, i, i+1)) != i ) i = ni; gc_global.hook_fns[i] = fn; return i; } void gc_remove_hook(int hook_id) { /* This is a no-op for now. */ } void _destroy_gc_subsystem(void) { #ifdef PROFILE_GC printf("Total heap: %u bytes (%.2fMB) in %u allocations\n", gc_global.total_size, (double)gc_global.total_size / 1000000, gc_global.allocations); #endif } void _init_gc_subsystem(void) { memset(&gc_global, 0, sizeof(gc_global)); gc_global.page_size = (unsigned int)sysconf(_SC_PAGESIZE); gc_global.free_chunks = alloc_more_chunks(); gc_global.nr_hooks = 0; gc_global.nr_sizes = 0; }