1 /******************************************************************************
4 * Skip lists, allowing concurrent update by use of CAS primitives.
6 * Matt Benjamin <matt@linuxbox.com>
8 * Adapts MCAS skip list to use a pointer-key and typed comparison
9 * function style (because often, your key isn't an integer).
11 * Also, set_for_each (and future additions) allow a set to be iterated.
12 * Current set_for_each iteration is unordered.
14 * Caution, pointer values 0x0, 0x01, and 0x02 are reserved. Fortunately,
15 * no real pointer is likely to have one of these values.
17 Copyright (c) 2003, Keir Fraser All rights reserved.
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are
23 * Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * Redistributions in binary form must reproduce the above
26 * copyright notice, this list of conditions and the following
27 * disclaimer in the documentation and/or other materials provided
28 * with the distribution. Neither the name of the Keir Fraser
29 * nor the names of its contributors may be used to endorse or
30 * promote products derived from this software without specific
31 * prior written permission.
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
37 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 #define __SET_IMPLEMENTATION__
51 #include "portable_defns.h"
53 #include "set_queue_adt.h"
55 // todo: get rid of me
58 unsigned long secret_key;
67 typedef struct node_st node_t;
68 typedef struct set_st osi_set_t;
69 typedef VOLATILE node_t *sh_node_pt;
73 // int xxx[1024]; /* XXXX testing gc */
74 #define LEVEL_MASK 0x0ff
75 #define READY_FOR_FREE 0x100
81 typedef int (*osi_set_cmp_func) (const void *lhs, const void *rhs);
85 osi_set_cmp_func cmpf;
91 static int gc_id[NUM_LEVELS];
97 #define compare_keys(s, k1, k2) (s->cmpf((const void*) k1, (const void *) k2))
100 * Random level generator. Drop-off rate is 0.5 per level.
101 * Returns value 1 <= level <= NUM_LEVELS.
104 get_level(ptst_t * ptst)
106 unsigned long r = rand_next(ptst);
108 r = (r >> 4) & ((1 << (NUM_LEVELS - 1)) - 1);
118 * Allocate a new node, and initialise its @level field.
119 * NB. Initialisation will eventually be pushed into garbage collector,
120 * because of dependent read reordering.
123 alloc_node(ptst_t * ptst)
128 n = gc_alloc(ptst, gc_id[l - 1]);
134 /* Free a node to the garbage collector. */
136 free_node(ptst_t * ptst, sh_node_pt n)
138 gc_free(ptst, (void *)n, gc_id[(n->level & LEVEL_MASK) - 1]);
143 * Search for first non-deleted node, N, with key >= @k at each level in @l.
145 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
146 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
147 * MAIN RETURN VALUE: same as @na[0].
150 strong_search_predecessors(osi_set_t * l, setkey_t k, sh_node_pt * pa,
153 sh_node_pt x, x_next, old_x_next, y, y_next;
161 for (i = NUM_LEVELS - 1; i >= 0; i--) {
162 /* We start our search at previous level's unmarked predecessor. */
163 READ_FIELD(x_next, x->next[i]);
164 /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
165 if (is_marked_ref(x_next))
168 for (y = x_next;; y = y_next) {
169 /* Shift over a sequence of marked nodes. */
171 READ_FIELD(y_next, y->next[i]);
172 if (!is_marked_ref(y_next))
174 y = get_unmarked_ref(y_next);
177 READ_FIELD(y_k, y->k);
178 if (compare_keys(l, y_k, k) >= 0)
181 /* Update estimate of predecessor at this level. */
186 /* Swing forward pointer over any marked nodes. */
188 old_x_next = CASPO(&x->next[i], x_next, y);
189 if (old_x_next != x_next)
203 /* This function does not remove marked nodes. Use it optimistically. */
205 weak_search_predecessors(osi_set_t * l, setkey_t k, sh_node_pt * pa,
208 sh_node_pt x, x_next;
213 for (i = NUM_LEVELS - 1; i >= 0; i--) {
215 READ_FIELD(x_next, x->next[i]);
216 x_next = get_unmarked_ref(x_next);
218 READ_FIELD(x_next_k, x_next->k);
219 if (compare_keys(l, x_next_k, k) >= 0)
236 * Mark @x deleted at every level in its list from @level down to level 1.
237 * When all forward pointers are marked, node is effectively deleted.
238 * Future searches will properly remove node by swinging predecessors'
242 mark_deleted(sh_node_pt x, int level)
246 while (--level >= 0) {
247 x_next = x->next[level];
248 while (!is_marked_ref(x_next)) {
249 x_next = CASPO(&x->next[level], x_next, get_marked_ref(x_next));
251 WEAK_DEP_ORDER_WMB(); /* mark in order */
257 check_for_full_delete(sh_node_pt x)
259 int level = x->level;
260 return ((level & READY_FOR_FREE) ||
261 (CASIO(&x->level, level, level | READY_FOR_FREE) != level));
266 do_full_delete(ptst_t * ptst, osi_set_t * l, sh_node_pt x, int level)
269 #ifdef WEAK_MEM_ORDER
270 sh_node_pt preds[NUM_LEVELS];
273 (void)strong_search_predecessors(l, k, preds, NULL);
275 * Above level 1, references to @x can disappear if a node is inserted
276 * immediately before and we see an old value for its forward pointer. This
277 * is a conservative way of checking for that situation.
282 node_t *n = get_unmarked_ref(preds[i]->next[i]);
283 while (compare_keys(l, n->k, k) < 0) {
284 n = get_unmarked_ref(n->next[i]);
285 RMB(); /* we don't want refs to @x to "disappear" */
289 i--; /* don't need to check this level again, even if we retry. */
292 (void)strong_search_predecessors(l, k, NULL, NULL);
303 * Called once before any set operations, including set_alloc
306 _init_osi_cas_skip_subsystem(void)
310 for (i = 0; i < NUM_LEVELS; i++) {
311 gc_id[i] = gc_add_allocator(sizeof(node_t) + i * sizeof(node_t *));
317 osi_cas_skip_alloc(osi_set_cmp_func cmpf)
323 n = malloc(sizeof(*n) + (NUM_LEVELS - 1) * sizeof(node_t *));
324 memset(n, 0, sizeof(*n) + (NUM_LEVELS - 1) * sizeof(node_t *));
325 n->k = (setkey_t *) SENTINEL_KEYMAX;
328 * Set the forward pointers of final node to other than NULL,
329 * otherwise READ_FIELD() will continually execute costly barriers.
330 * Note use of 0xfe -- that doesn't look like a marked value!
332 memset(n->next, 0xfe, NUM_LEVELS * sizeof(node_t *));
334 l = malloc(sizeof(*l) + (NUM_LEVELS - 1) * sizeof(node_t *));
336 l->head.k = (setkey_t *) SENTINEL_KEYMIN;
337 l->head.level = NUM_LEVELS;
338 for (i = 0; i < NUM_LEVELS; i++) {
347 osi_cas_skip_update(osi_set_t * l, setkey_t k, setval_t v, int overwrite)
351 sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS];
352 sh_node_pt pred, succ, new = NULL, new_next, old_next;
355 ptst = critical_enter();
357 succ = weak_search_predecessors(l, k, preds, succs);
362 if (compare_keys(l, succ->k, k) == 0) {
363 /* Already a @k node in the list: update its mapping. */
366 if ((ov = new_ov) == NULL) {
367 /* Finish deleting the node, then retry. */
368 READ_FIELD(level, succ->level);
369 mark_deleted(succ, level & LEVEL_MASK);
370 succ = strong_search_predecessors(l, k, preds, succs);
373 } while (overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov));
376 free_node(ptst, new);
379 #ifdef WEAK_MEM_ORDER
380 /* Free node from previous attempt, if this is a retry. */
382 free_node(ptst, new);
387 /* Not in the list, so initialise a new node for insertion. */
389 new = alloc_node(ptst);
395 /* If successors don't change, this saves us some CAS operations. */
396 for (i = 0; i < level; i++) {
397 new->next[i] = succs[i];
400 /* We've committed when we've inserted at level 1. */
401 WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
402 old_next = CASPO(&preds[0]->next[0], succ, new);
403 if (old_next != succ) {
404 succ = strong_search_predecessors(l, k, preds, succs);
408 /* Insert at each of the other levels in turn. */
414 /* Someone *can* delete @new under our feet! */
415 new_next = new->next[i];
416 if (is_marked_ref(new_next))
419 /* Ensure forward pointer of new node is up to date. */
420 if (new_next != succ) {
421 old_next = CASPO(&new->next[i], new_next, succ);
422 if (is_marked_ref(old_next))
424 assert(old_next == new_next);
427 /* Ensure we have unique key values at every level. */
428 if (compare_keys(l, succ->k, k) == 0)
431 assert((compare_keys(l, pred->k, k) < 0) &&
432 (compare_keys(l, succ->k, k) > 0));
434 /* Replumb predecessor's forward pointer. */
435 old_next = CASPO(&pred->next[i], succ, new);
436 if (old_next != succ) {
438 RMB(); /* get up-to-date view of the world. */
439 (void)strong_search_predecessors(l, k, preds, succs);
443 /* Succeeded at this level. */
448 /* Ensure node is visible at all levels before punting deletion. */
449 WEAK_DEP_ORDER_WMB();
450 if (check_for_full_delete(new)) {
451 MB(); /* make sure we see all marks in @new. */
452 do_full_delete(ptst, l, new, level - 1);
460 osi_cas_skip_remove(osi_set_t * l, setkey_t k)
462 setval_t v = NULL, new_v;
464 sh_node_pt preds[NUM_LEVELS], x;
467 ptst = critical_enter();
469 x = weak_search_predecessors(l, k, preds, NULL);
470 if (compare_keys(l, x->k, k) > 0)
473 READ_FIELD(level, x->level);
474 level = level & LEVEL_MASK;
476 /* Once we've marked the value field, the node is effectively deleted. */
482 } while ((new_v = CASPO(&x->v, v, NULL)) != v);
484 /* Committed to @x: mark lower-level forward pointers. */
485 WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
486 mark_deleted(x, level);
489 * We must swing predecessors' pointers, or we can end up with
490 * an unbounded number of marked but not fully deleted nodes.
491 * Doing this creates a bound equal to number of threads in the system.
492 * Furthermore, we can't legitimately call 'free_node' until all shared
493 * references are gone.
495 for (i = level - 1; i >= 0; i--) {
496 if (CASPO(&preds[i]->next[i], x, get_unmarked_ref(x->next[i])) != x) {
497 if ((i != (level - 1)) || check_for_full_delete(x)) {
498 MB(); /* make sure we see node at all levels. */
499 do_full_delete(ptst, l, x, i);
514 osi_cas_skip_lookup(osi_set_t * l, setkey_t k)
520 ptst = critical_enter();
522 x = weak_search_predecessors(l, k, NULL, NULL);
523 if (compare_keys(l, x->k, k) == 0)
531 /* Hybrid Set/Queue Operations (Matt) */
533 /* Iterate over a sequential structure, calling callback_func
534 * on each (undeleted) element visited. */
536 /* Each-element function passed to set_for_each */
537 typedef void (*osi_set_each_func) (osi_set_t * l, setval_t v, void *arg);
539 osi_cas_skip_for_each(osi_set_t * l, osi_set_each_func each_func, void *arg)
541 sh_node_pt x, y, x_next, old_x_next;
546 ptst = critical_enter();
549 for (i = NUM_LEVELS - 1; i >= 0; i--) {
550 /* must revisit x at each level to see all
551 * nodes in the structure */
555 READ_FIELD(x_next, y->next[i]);
556 x_next = get_unmarked_ref(x_next);
558 READ_FIELD(x_next_k, x_next->k);
559 if (x_next_k == (setkey_t) SENTINEL_KEYMAX)
562 /* in our variation, a (stored) k is a v,
563 * ie, x_next_k is x_next_v */
564 each_func(l, x_next_k, arg);