1 /******************************************************************************
4 * Obstruction-free software transactional memory (STM).
6 * For more information see:
7 * Software Transactional Memory for Dynamic-sized Data Structures
8 * Maurice Herlihy, Victor Luchangco, Mark Moir, and William Scherer III
9 * Proceedings of 2003 ACM Symposium on Principles of Distributed Computing
11 * Copyright (c) 2003, K A Fraser
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are
17 * Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution. Neither the name of the Keir Fraser
23 * nor the names of its contributors may be used to endorse or
24 * promote products derived from this software without specific
25 * prior written permission.
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include "portable_defns.h"
58 typedef struct stm_loc_st stm_loc;
59 typedef struct stm_blk_st stm_blk;
60 typedef struct stm_tx_entry_st stm_tx_entry;
61 typedef struct stm_tx_st stm_tx;
62 typedef struct stm_st stm;
65 unsigned long status; /* TXS_FAILED, TXS_SUCCESSFUL, descriptor. */
74 struct stm_tx_entry_st {
87 stm_tx_entry *alloc_ptr, *check;
89 int gc_data_id, blk_size; /* copied from 'stm' structure */
98 /* Private per-thread state. The array is indexed off ptst->id. */
100 void *arena, *arena_lim;
101 stm_tx *next_descriptor;
104 unsigned int random_counter;
109 static priv_t priv_ptst[MAX_THREADS];
110 static int gc_blk_id; /* Allocation id for block descriptors. */
111 static int gc_loc_id; /* Allocation id for locators. */
112 static int do_padding; /* Should all allocations be padded to a cache line? */
115 #define MAX_RETRIES 8
117 #define MIN_LOG_BACKOFF 4
118 #define MAX_LOG_BACKOFF 31
119 #define RANDOM_BITS 8
120 #define RANDOM_SIZE (1 << RANDOM_BITS)
121 #define RANDOM_MASK (RANDOM_SIZE - 1)
122 static unsigned int rand_arr[RANDOM_SIZE];
126 static stm_blk *dummy_obj; /* Dummy object (used by red-black trees). */
127 static void *dummy_data;
129 #define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s))
131 #define ARENA_SIZE 40960
132 #define DESCRIPTOR_SIZE 4096
134 #define TXS_IN_PROGRESS 0U
135 #define TXS_FAILED 1U
136 #define TXS_SUCCESSFUL 2U
138 #define is_descriptor(_p) (((unsigned long)(_p) & 3) == 0)
139 #define mk_descriptor(_p) ((stm_tx *)(_p))
141 /* Is transaction read-only? */
142 #define read_only(_t) ((_t)->writes == NULL)
144 /* Is transaction definitely doomed to fail? */
145 #define is_stale(_t, _e) \
146 (((_t)->status != TXS_IN_PROGRESS) || ((_e)->b->loc != (_e)->l))
148 bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t);
150 static void new_arena (priv_t *priv, int size)
152 priv->arena = malloc(size);
153 if ( priv->arena == NULL ) abort();
154 priv->arena_lim = (((char *) priv->arena) + size);
157 static void release_descriptor(ptst_t *ptst, stm_tx *t)
160 priv_t *priv = &priv_ptst[ptst->id];
163 t->next_free = priv->next_descriptor;
164 priv->next_descriptor = t;
167 static int rc_delta_descriptor(stm_tx *t, int delta)
169 int rc, new_rc = t->rc;
172 while ( (new_rc = CASIO (&t->rc, rc, rc + delta)) != rc );
177 static void rc_up_descriptor(stm_tx *t)
179 rc_delta_descriptor(t, 2);
183 static void rc_down_descriptor(ptst_t *ptst, stm_tx *t)
185 int old_rc, new_rc, cur_rc = t->rc;
192 if ( new_rc == 0 ) new_rc = 1;
194 while ( (cur_rc = CASIO (&t->rc, old_rc, new_rc)) != old_rc );
196 if ( old_rc == 2 ) release_descriptor(ptst, t);
199 static stm_tx *new_descriptor(priv_t *priv)
203 t = priv->next_descriptor;
207 priv->next_descriptor = t->next_free;
208 /* 'Unfree' descriptor, if it was previously freed. */
209 if ( (t->rc & 1) == 1 ) rc_delta_descriptor(t, 1);
213 t = (stm_tx *) priv->arena;
214 priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE;
216 if ( priv->arena >= priv->arena_lim )
218 new_arena(priv, ARENA_SIZE);
219 t = (stm_tx *) priv->arena;
220 priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE;
231 static stm_tx_entry *alloc_stm_tx_entry(stm_tx *t)
233 stm_tx_entry *ent = t->alloc_ptr++;
234 assert(((unsigned long)t->alloc_ptr - (unsigned long)t) <=
240 static stm_tx_entry **search_stm_tx_entry(stm_tx_entry **pnext, stm_blk *b)
242 stm_tx_entry *next = *pnext;
244 while ( (next != NULL) && ((unsigned long)next->b < (unsigned long)b) )
254 static int contention_wait(ptst_t *ptst, int attempts)
257 if ( (attempts > 1) && (attempts <= MAX_RETRIES) )
259 #ifdef SPARC /* Exactly as it was done by the original authors. */
260 priv_t *priv = &priv_ptst[ptst->id];
261 struct timespec rqtp;
262 unsigned int log_backoff, mask;
263 log_backoff = attempts - 2 + MIN_LOG_BACKOFF;
264 if ( log_backoff > MAX_LOG_BACKOFF )
265 log_backoff = MAX_LOG_BACKOFF;
266 mask = (1 << log_backoff) - 1;
267 rqtp.tv_nsec = rand_arr[priv->random_counter++ & RANDOM_MASK] & mask;
269 while ( nanosleep(&rqtp, NULL) != 0 ) continue;
275 return attempts < MAX_RETRIES;
282 static void *read_loc_data(ptst_t *ptst, stm_loc *l)
292 switch ( (st = l->status) )
299 t = mk_descriptor(st);
301 if ( l->status == st )
306 rc_down_descriptor(ptst, t);
307 l->status = TXS_SUCCESSFUL;
310 rc_down_descriptor(ptst, t);
311 l->status = TXS_FAILED;
314 if ( !contention_wait(ptst, ++attempts) )
317 CASIO(&t->status, TXS_IN_PROGRESS, TXS_FAILED);
321 rc_down_descriptor(ptst, t);
327 static stm_loc *install_loc(ptst_t *ptst, stm_tx *t,
328 stm_blk *b, stm_loc *old_loc)
330 stm_loc *new_loc = gc_alloc(ptst, gc_loc_id);
332 new_loc->status = (unsigned long)t;
333 new_loc->new = gc_alloc(ptst, t->gc_data_id);
334 new_loc->old = read_loc_data(ptst, old_loc);
335 memcpy(new_loc->new, new_loc->old, t->blk_size);
337 if ( CASPO(&b->loc, old_loc, new_loc) != old_loc )
339 gc_unsafe_free(ptst, new_loc->new, t->gc_data_id);
340 gc_unsafe_free(ptst, new_loc , gc_loc_id);
345 gc_free(ptst, old_loc, gc_loc_id);
352 stm *new_stm(ptst_t *ptst, int blk_size)
354 stm *mem = malloc(CACHE_LINE_SIZE);
355 mem->blk_size = blk_size;
356 mem->gc_data_id = gc_add_allocator(ALLOCATOR_SIZE(blk_size));
361 void free_stm(ptst_t *ptst, stm *mem)
363 gc_remove_allocator(mem->gc_data_id);
368 stm_blk *new_stm_blk(ptst_t *ptst, stm *mem)
370 stm_blk *b = gc_alloc(ptst, gc_blk_id);
371 stm_loc *l = gc_alloc(ptst, gc_loc_id);
373 l->status = TXS_SUCCESSFUL;
375 l->new = gc_alloc(ptst, mem->gc_data_id);
380 void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
385 l = FASPO(&b->loc, NULL);
386 data = read_loc_data(ptst, l);
388 gc_free(ptst, data, mem->gc_data_id);
389 gc_free(ptst, l, gc_loc_id);
390 gc_free(ptst, b, gc_blk_id);
394 void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
400 int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
402 return mem->blk_size;
406 stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv)
408 priv_t *priv = &priv_ptst[ptst->id];
411 if ( priv->cur_tx != NULL ) goto nesting;
412 t = new_descriptor(priv);
413 t->status = TXS_IN_PROGRESS;
414 t->reads = t->writes = NULL;
415 t->alloc_ptr = t->check = (stm_tx_entry *)(t + 1);
416 t->gc_data_id = mem->gc_data_id;
417 t->blk_size = mem->blk_size;
424 fprintf(stderr, "No nesting of transactions is allowed\n");
429 bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t)
431 unsigned int desired_st = TXS_SUCCESSFUL, st;
433 priv_t *priv = &priv_ptst[ptst->id];
439 for ( ent = t->reads; ent != NULL; ent = ent->next )
441 if ( ent->b->loc != ent->l )
442 desired_st = TXS_FAILED;
447 /* A very fast path: we can immediately reuse the descriptor. */
448 if ( t->dummy != NULL )
449 gc_unsafe_free(ptst, t->dummy, t->gc_data_id);
450 t->next_free = priv->next_descriptor;
451 priv->next_descriptor = t;
452 return desired_st == TXS_SUCCESSFUL;
455 st = CASIO(&t->status, TXS_IN_PROGRESS, desired_st);
456 if ( st == TXS_IN_PROGRESS )
459 assert((st == TXS_FAILED) || (st == TXS_SUCCESSFUL));
463 for ( ent = t->writes; ent != NULL; ent = ent->next )
465 ent->l->status = (unsigned long)st;
467 (st == TXS_SUCCESSFUL) ? ent->l->old : ent->l->new,
471 if ( t->dummy != NULL )
472 gc_unsafe_free(ptst, t->dummy, t->gc_data_id);
474 rc_down_descriptor(ptst, t);
476 return st == TXS_SUCCESSFUL;
480 bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t)
486 /* A conflict on a pending update will cause us to get failed. */
487 if ( t->status == TXS_FAILED )
490 /* Reads must be explicitly checked. */
491 for ( ent = t->reads; ent != NULL; ent = ent->next )
493 if ( ent->b->loc != ent->l )
500 t->status = TXS_FAILED;
505 void abort_stm_tx(ptst_t *ptst, stm_tx *t)
507 t->status = TXS_FAILED;
511 void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
513 stm_tx_entry **pent, *ent;
517 if ( b == dummy_obj )
519 if ( t->dummy == NULL )
521 t->dummy = gc_alloc(ptst, t->gc_data_id);
522 memcpy(t->dummy, dummy_data, t->blk_size);
527 pent = search_stm_tx_entry(&t->writes, b);
529 if ( (ent != NULL) && (ent->b == b) ) goto found;
531 pent = search_stm_tx_entry(&t->reads, b);
533 if ( (ent != NULL) && (ent->b == b) ) goto found;
535 ent = alloc_stm_tx_entry(t);
537 if ( (ent->l = b->loc) == NULL )
539 ent->data = read_loc_data(ptst, ent->l);
548 if ( is_stale(t, ent) ) goto fail;
549 if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
554 abort_stm_tx(ptst, t);
555 commit_stm_tx(ptst, t);
556 siglongjmp(*penv, 0);
562 void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
564 stm_tx_entry **r_pent, **w_pent, *ent;
569 if ( b == dummy_obj )
571 if ( t->dummy == NULL )
573 t->dummy = gc_alloc(ptst, t->gc_data_id);
574 memcpy(t->dummy, dummy_data, t->blk_size);
579 w_pent = search_stm_tx_entry(&t->writes, b);
581 if ( (ent != NULL) && (ent->b == b) ) goto found;
583 r_pent = search_stm_tx_entry(&t->reads, b);
585 if ( (ent != NULL) && (ent->b == b) )
591 ent = alloc_stm_tx_entry(t);
593 if ( (ent->l = b->loc) == NULL )
597 loc = install_loc(ptst, t, b, ent->l);
598 if ( loc == NULL ) goto fail;
601 ent->data = loc->new;
610 if ( is_stale(t, ent) ) goto fail;
611 if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
616 abort_stm_tx(ptst, t);
617 commit_stm_tx(ptst, t);
618 siglongjmp(*penv, 0);
624 void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b)
626 if ( dummy_obj == NULL )
629 dummy_data = read_loc_data(ptst, b->loc);
634 static void handle_fault(int sig)
639 ptst = critical_enter();
640 t = priv_ptst[ptst->id].cur_tx;
641 if ( (t != NULL) && !validate_stm_tx(ptst, t) )
643 sigjmp_buf *penv = t->penv;
644 commit_stm_tx(ptst, t);
646 siglongjmp(*penv, 0);
650 fprintf(stderr, "Error: unhandleable SIGSEGV!\n");
655 void _init_stm_subsystem(int pad_data)
657 struct sigaction act;
661 struct timespec rqtp;
666 while ( nanosleep(&rqtp, NULL) != 0 )
668 if ( errno != EINTR )
670 printf("Urk! Nanosleep not supported!\n");
675 for ( i = 0; i < RANDOM_SIZE; i++ )
676 rand_arr[i] = (unsigned int)random();
679 do_padding = pad_data;
680 gc_blk_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk)));
681 gc_loc_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_loc)));
682 memset(priv_ptst, 0, sizeof(priv_ptst));
684 act.sa_handler = handle_fault;
685 sigemptyset(&act.sa_mask);
687 sigaction(SIGSEGV, &act, NULL);