1 /******************************************************************************
4 * Skip lists, allowing concurrent update by use of MCAS primitive.
6 * Copyright (c) 2001-2003, K A Fraser
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
12 * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution. Neither the name of the Keir Fraser
18 * nor the names of its contributors may be used to endorse or
19 * promote products derived from this software without specific
20 * prior written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #define __SET_IMPLEMENTATION__
41 #include "portable_defns.h"
45 #define MCAS_MARK(_v) ((unsigned long)(_v) & 3)
47 #define PROCESS(_v, _pv) \
48 while ( MCAS_MARK(_v) ) { \
49 mcas_fixup((void **)(_pv), _v); \
53 #define WALK_THRU(_v, _pv) \
54 if ( MCAS_MARK(_v) ) (_v) = read_barrier_lite((void **)(_pv));
56 /* Pull in the MCAS implementation. */
63 typedef struct node_st node_t;
64 typedef struct set_st set_t;
65 typedef VOLATILE node_t *sh_node_pt;
80 static int gc_id[NUM_LEVELS];
87 * Random level generator. Drop-off rate is 0.5 per level.
88 * Returns value 1 <= level <= NUM_LEVELS.
90 static int get_level(ptst_t *ptst)
92 unsigned long r = rand_next(ptst);
94 r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1);
95 while ( (r & 1) ) { l++; r >>= 1; }
101 * Allocate a new node, and initialise its @level field.
102 * NB. Initialisation will eventually be pushed into garbage collector,
103 * because of dependent read reordering.
105 static node_t *alloc_node(ptst_t *ptst)
110 n = gc_alloc(ptst, gc_id[l - 1]);
116 /* Free a node to the garbage collector. */
117 static void free_node(ptst_t *ptst, sh_node_pt n)
119 gc_free(ptst, (void *)n, gc_id[n->level - 1]);
124 * Search for first non-deleted node, N, with key >= @k at each level in @l.
126 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
127 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
128 * MAIN RETURN VALUE: same as @na[0].
130 static sh_node_pt search_predecessors(
131 set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na)
133 sh_node_pt x, x_next;
140 for ( i = NUM_LEVELS - 1; i >= 0; i-- )
144 READ_FIELD(x_next, x->next[i]);
145 WALK_THRU(x_next, &x->next[i]);
147 READ_FIELD(x_next_k, x_next->k);
148 if ( x_next_k >= k ) break;
154 if ( na ) na[i] = x_next;
160 static setval_t finish_delete(sh_node_pt x, sh_node_pt *preds)
162 per_thread_state_t *mcas_ptst = get_ptst();
164 int level, i, ret = FALSE;
169 READ_FIELD(level, x->level);
171 cd = new_descriptor(mcas_ptst, (level << 1) + 1);
172 cd->status = STATUS_IN_PROGRESS;
173 cd->length = (level << 1) + 1;
175 /* First, the deleted node's value field. */
178 if ( v == NULL ) goto fail;
179 cd->entries[0].ptr = (void **)&x->v;
180 cd->entries[0].old = v;
181 cd->entries[0].new = NULL;
183 for ( i = 0; i < level; i++ )
185 READ_FIELD(x_next, x->next[i]);
186 PROCESS(x_next, &x->next[i]);
187 READ_FIELD(x_next_k, x_next->k);
188 if ( x->k > x_next_k ) { v = NULL; goto fail; }
189 cd->entries[i +1].ptr = (void **)&x->next[i];
190 cd->entries[i +1].old = x_next;
191 cd->entries[i +1].new = preds[i];
192 cd->entries[i+level+1].ptr = (void **)&preds[i]->next[i];
193 cd->entries[i+level+1].old = x;
194 cd->entries[i+level+1].new = x_next;
197 ret = mcas0(mcas_ptst, cd);
198 if ( ret == 0 ) v = NULL;
201 rc_down_descriptor(cd);
210 set_t *set_alloc(void)
216 static int mcas_inited = 0;
217 if ( !CASIO(&mcas_inited, 0, 1) ) mcas_init();
219 n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
220 memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
221 n->k = SENTINEL_KEYMAX;
224 * Set the forward pointers of final node to other than NULL,
225 * otherwise READ_FIELD() will continually execute costly barriers.
226 * Note use of 0xfc -- that doesn't look like a marked value!
228 memset(n->next, 0xfc, NUM_LEVELS*sizeof(node_t *));
230 l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(node_t *));
231 l->head.k = SENTINEL_KEYMIN;
232 l->head.level = NUM_LEVELS;
233 for ( i = 0; i < NUM_LEVELS; i++ )
242 setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite)
246 sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS];
247 sh_node_pt succ, new = NULL;
249 per_thread_state_t *mcas_ptst = NULL;
252 k = CALLER_TO_INTERNAL_KEY(k);
254 ptst = critical_enter();
260 succ = search_predecessors(l, k, preds, succs);
264 /* Already a @k node in the list: update its mapping. */
265 READ_FIELD(new_ov, succ->v);
268 PROCESS(ov, &succ->v);
269 if ( ov == NULL ) goto retry;
271 while ( overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov) );
273 if ( new != NULL ) free_node(ptst, new);
277 #ifdef WEAK_MEM_ORDER
278 /* Free node from previous attempt, if this is a retry. */
281 free_node(ptst, new);
286 /* Not in the list, so initialise a new node for insertion. */
289 new = alloc_node(ptst);
294 for ( i = 0; i < new->level; i++ )
296 new->next[i] = succs[i];
299 if ( !mcas_ptst ) mcas_ptst = get_ptst();
300 cd = new_descriptor(mcas_ptst, new->level);
301 cd->status = STATUS_IN_PROGRESS;
302 cd->length = new->level;
303 for ( i = 0; i < new->level; i++ )
305 cd->entries[i].ptr = (void **)&preds[i]->next[i];
306 cd->entries[i].old = succs[i];
307 cd->entries[i].new = new;
309 ret = mcas0(mcas_ptst, cd);
310 rc_down_descriptor(cd);
320 setval_t set_remove(set_t *l, setkey_t k)
324 sh_node_pt preds[NUM_LEVELS], x;
326 k = CALLER_TO_INTERNAL_KEY(k);
328 ptst = critical_enter();
331 x = search_predecessors(l, k, preds, NULL);
332 if ( x->k > k ) goto out;
333 } while ( (v = finish_delete(x, preds)) == NULL );
343 setval_t set_lookup(set_t *l, setkey_t k)
349 k = CALLER_TO_INTERNAL_KEY(k);
351 ptst = critical_enter();
353 x = search_predecessors(l, k, NULL, NULL);
365 void _init_set_subsystem(void)
369 for ( i = 0; i < NUM_LEVELS; i++ )
371 gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(node_t *));