2 * Copyright 2007 Secure Endpoints Inc.
6 * This software has been released under the terms of the IBM Public
7 * License. For details, see the LICENSE file in the top-level source
8 * directory or online at http://www.openafs.org/dl/license10.html
10 * Thanks to Jan Jannink for B+ tree algorithms.
22 /******************* statistics globals *********************************/
23 afs_uint32 bplus_lookup_hits = 0;
24 afs_uint32 bplus_lookup_hits_inexact = 0;
25 afs_uint32 bplus_lookup_misses = 0;
26 afs_uint32 bplus_lookup_ambiguous = 0;
27 afs_uint32 bplus_create_entry = 0;
28 afs_uint32 bplus_remove_entry = 0;
29 afs_uint32 bplus_build_tree = 0;
30 afs_uint32 bplus_free_tree = 0;
31 afs_uint32 bplus_dv_error = 0;
33 afs_uint64 bplus_lookup_time = 0;
34 afs_uint64 bplus_create_time = 0;
35 afs_uint64 bplus_remove_time = 0;
36 afs_uint64 bplus_build_time = 0;
37 afs_uint64 bplus_free_time = 0;
39 /*********************** private functions *************************/
40 static void initFreeNodePool(Tree *B, int quantity);
41 static Nptr getFreeNode(Tree *B);
42 static void putFreeNode(Tree *B, Nptr self);
43 static void cleanupNodePool(Tree *B);
45 static Nptr descendToLeaf(Tree *B, Nptr curr);
46 static int getSlot(Tree *B, Nptr curr);
47 static int findKey(Tree *B, Nptr curr, int lo, int hi);
48 static int bestMatch(Tree *B, Nptr curr, int slot);
50 static Nptr getDataNode(Tree *B, keyT key, dataT data);
51 static Nptr descendSplit(Tree *B, Nptr curr);
52 static void insertEntry(Tree *B, Nptr node, int slot, Nptr sibling, Nptr downPtr);
53 static void placeEntry(Tree *B, Nptr node, int slot, Nptr downPtr);
54 static Nptr split(Tree *B, Nptr node);
55 static void makeNewRoot(Tree *B, Nptr oldRoot, Nptr newNode);
57 static Nptr descendBalance(Tree *B, Nptr curr, Nptr left, Nptr right, Nptr lAnc, Nptr rAnc, Nptr parent);
58 static void collapseRoot(Tree *B, Nptr oldRoot, Nptr newRoot);
59 static void removeEntry(Tree *B, Nptr curr, int slot);
60 static Nptr merge(Tree *B, Nptr left, Nptr right, Nptr anchor);
61 static Nptr shift(Tree *B, Nptr left, Nptr right, Nptr anchor);
63 static void _clrentry(Nptr node, int entry);
64 static void _pushentry(Nptr node, int entry, int offset);
65 static void _pullentry(Nptr node, int entry, int offset);
66 static void _xferentry(Nptr srcNode, int srcEntry, Nptr destNode, int destEntry);
67 static void _setentry(Nptr node, int entry, keyT key, Nptr downNode);
70 static int _isRoot(Tree *B, Nptr n)
72 int flagset = ((n->flags & isROOT) == isROOT);
77 if (flagset && n != getroot(B))
83 static int _isFew(Tree *B, Nptr n)
85 int flagset = ((n->flags & FEWEST) == FEWEST);
86 int fanout = getminfanout(B, n);
87 int entries = numentries(n);
88 int mincnt = entries <= fanout;
93 if (flagset && !mincnt || !flagset && mincnt)
99 static int _isFull(Tree *B, Nptr n)
101 int flagset = ((n->flags & isFULL) == isFULL);
102 int maxcnt = numentries(n) == getfanout(B);
107 if (flagset && !maxcnt || !flagset && maxcnt)
112 #endif /* DEBUG_BTREE */
114 /***********************************************************************\
115 | B+tree Initialization and Cleanup Routines |
116 \***********************************************************************/
118 /******************** Set up B+tree structure **********************/
119 Tree *initBtree(unsigned int poolsz, unsigned int fanout, KeyCmp keyCmp)
123 dataT data = {0,0,0,0};
125 if (fanout > MAX_FANOUT)
128 setbplustree(B, malloc(sizeof(Tree)));
129 memset(B, 0, sizeof(Tree));
130 setfanout(B, fanout);
131 setminfanout(B, (fanout + 1) >> 1);
132 initFreeNodePool(B, poolsz);
134 setleaf(B, getFreeNode(B)); /* set up the first leaf node */
135 setroot(B, getleaf(B)); /* the root is initially the leaf */
136 setflag(getroot(B), isLEAF);
137 setflag(getroot(B), isROOT);
138 setflag(getroot(B), FEWEST);
143 setcomparekeys(B, keyCmp);
146 sprintf(B->message, "INIT: B+tree of fanout %d at %10p.\n", fanout, (void *)B);
147 OutputDebugString(B->message);
153 /******************** Clean up B+tree structure ********************/
155 * dirlock must be write locked
157 void freeBtree(Tree *B)
160 sprintf(B->message, "FREE: B+tree at %10p.\n", (void *) B);
161 OutputDebugString(B->message);
166 memset(B, 0, sizeof(*B));
171 /***********************************************************************\
172 | Find leaf node in which data nodes can be found |
173 \***********************************************************************/
175 /********************** top level lookup **********************/
176 Nptr bplus_Lookup(Tree *B, keyT key)
181 sprintf(B->message, "LOOKUP: key %s.\n", key.name);
182 OutputDebugString(B->message);
185 setfunkey(B, key); /* set search key */
186 leafNode = descendToLeaf(B, getroot(B)); /* start search from root node */
194 slot = getSlot(B, leafNode);
198 dataNode = getnode(leafNode, slot);
199 data = getdatavalue(dataNode);
201 sprintf(B->message, "LOOKUP: %s found on page %d value (%d.%d.%d).\n",
203 getnodenumber(B, leafNode),
208 sprintf(B->message, "LOOKUP: not found!\n");
209 OutputDebugString(B->message);
215 /********************** `recurse' down B+tree **********************/
216 static Nptr descendToLeaf(Tree *B, Nptr curr)
223 memset(prev, 0, sizeof(prev));
225 for (depth = 0, slot = getSlot(B, curr); (slot >= 0) && isinternal(curr); depth++, slot = getSlot(B, curr)) {
228 curr = getfirstnode(curr);
230 curr = getnode(curr, slot);
231 else /* BTERROR, BTLOWER, BTUPPER */ {
240 if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0))
241 findNode = curr; /* correct key value found */
243 findNode = NONODE; /* key value not in tree */
248 /******************** find slot for search key *********************/
249 static int getSlot(Tree *B, Nptr curr)
253 entries = numentries(curr); /* need this if root is ever empty */
254 slot = !entries ? 0 : findKey(B, curr, 1, entries);
260 /******************** recursive binary search **********************/
261 static int findKey(Tree *B, Nptr curr, int lo, int hi)
263 int mid, findslot = BTERROR;
266 findslot = bestMatch(B, curr, lo); /* recursion base case */
269 if (findslot == BTERROR) {
270 sprintf(B->message, "FINDKEY: (lo %d hi %d) Bad key ordering on node %d (0x%p)\n",
271 lo, hi, getnodenumber(B, curr), curr);
272 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
276 mid = (lo + hi) >> 1;
277 switch (findslot = bestMatch(B, curr, mid)) {
278 case BTLOWER: /* check lower half of range */
279 findslot = findKey(B, curr, lo, mid - 1); /* never in 2-3+trees */
281 case BTUPPER: /* check upper half of range */
282 findslot = findKey(B, curr, mid + 1, hi);
285 sprintf(B->message, "FINDKEY: (lo %d hi %d) Bad key ordering on node %d (0x%p)\n",
286 lo, hi, getnodenumber(B, curr), curr);
287 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
291 if (isleaf(curr) && findslot == 0)
293 sprintf(B->message, "FINDKEY: (lo %d hi %d) findslot %d is invalid for leaf nodes, bad key ordering on node %d (0x%p)\n",
294 lo, hi, findslot, getnodenumber(B, curr), curr);
295 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
301 /************ comparison of key with a target key slot *************/
302 static int bestMatch(Tree *B, Nptr curr, int slot)
304 int diff, comp=2, findslot;
306 diff = comparekeys(B)(getfunkey(B), getkey(curr, slot), 0);
309 } else if (diff < 0) { /* also check previous slot */
312 findslot = BTLOWER; /* not found in the tree */
316 else if ((comp = comparekeys(B)(getfunkey(B), getkey(curr, slot - 1), 0)) >= 0) {
318 } else if (comp < diff) {
319 findslot = BTERROR; /* inconsistent ordering of keys */
324 findslot = BTLOWER; /* key must be below in node ordering */
326 } else { /* or check following slot */
327 if (slot == numentries(curr)) {
328 if (isleaf(curr) && numentries(curr) == getfanout(B))
332 } else if ((comp = comparekeys(B)(getfunkey(B), getkey(curr, slot + 1), 0)) < 0) {
334 } else if (comp == 0) {
336 } else if (comp > diff) {
337 findslot = BTERROR; /* inconsistent ordering of keys */
342 findslot = BTUPPER; /* key must be above in node ordering */
346 if (findslot == BTERROR || isleaf(curr) && findslot == 0)
348 sprintf(B->message, "BESTMATCH: node %d (0x%p) slot %d diff %d comp %d findslot %d\n",
349 getnodenumber(B, curr), curr, slot, diff, comp, findslot);
350 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
356 /***********************************************************************\
357 | Insert new data into tree |
358 \***********************************************************************/
361 /********************** top level insert call **********************/
362 void insert(Tree *B, keyT key, dataT data)
367 sprintf(B->message, "INSERT: key %s.\n", key.name);
368 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
371 setfunkey(B, key); /* set insertion key */
372 setfundata(B, data); /* a node containing data */
373 setsplitpath(B, NONODE);
374 newNode = descendSplit(B, getroot(B)); /* insertion point search from root */
375 if (newNode != getsplitpath(B)) /* indicates the root node has split */
376 makeNewRoot(B, getroot(B), newNode);
380 /***************** recurse down and split back up ******************/
382 descendSplit(Tree *B, Nptr curr)
384 Nptr downNode = NONODE, sibling = NONODE;
392 setsplitpath(B, NONODE);
393 else if (getsplitpath(B) == NONODE)
394 setsplitpath(B, curr); /* indicates where nodes must split */
396 slot = getSlot(B, curr); /* is null only if the root is empty */
403 else if (slot == BTUPPER)
407 if (isinternal(curr)) { /* continue recursion to leaves */
409 downNode = descendSplit(B, getfirstnode(curr));
411 downNode = descendSplit(B, getnode(curr, slot));
412 } else if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0)) {
413 if (!(gettreeflags(B) & TREE_FLAG_UNIQUE_KEYS)) {
414 downNode = getDataNode(B, getfunkey(B), getfundata(B));
415 getdatanext(downNode) = getnode(curr,slot);
416 setnode(curr, slot, downNode);
419 setsplitpath(B, NONODE);
422 downNode = getDataNode(B, getfunkey(B), getfundata(B)); /* an insertion takes place */
424 if (downNode != NONODE) { /* insert only where necessary */
425 if (getsplitpath(B) != NONODE)
426 sibling = split(B, curr); /* a sibling node is prepared */
427 insertEntry(B, curr, slot, sibling, downNode);
433 /*************** determine location of inserted key ****************/
435 insertEntry(Tree *B, Nptr currNode, int slot, Nptr sibling, Nptr downPtr)
437 int split, i, j, k, x, y;
441 sprintf(B->message, "INSERT: slot %d, down node %d.\n", slot, getnodenumber(B, downPtr));
442 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
445 if (sibling == NONODE) { /* no split occurred */
446 placeEntry(B, currNode, slot + 1, downPtr);
448 else { /* split entries between the two */
449 if isinternal(currNode) {
451 split = getfanout(B) - getminfanout(B, currNode);
452 } else if (isroot(currNode)) {
453 /* split the root node and turn it into just a leaf */
455 split = getminfanout(B, currNode);
458 split = getminfanout(B, currNode);
460 j = (slot != split ? 1 : 0);
461 k = (slot >= split ? 1 : 0);
464 * Move entries from the top half of the current node to
465 * to the sibling node.
466 * The number of entries to move is dependent upon where
467 * the new entry is going to be added in relationship to
468 * the split slot. (slots are 1-based). If order to produce
469 * a balanced tree, if the insertion slot is greater than
470 * the split we move one less entry as the new entry will
471 * be inserted into the sibling.
473 * If the node that is being split is an internal node (i != 0)
474 * then we move one less entry due to the extra down pointer
475 * when the split slot is not equal to the insertion slot
477 for (x = split + k + j * i, y = 1; x <= getfanout(B); x++, y++) {
478 xferentry(currNode, x, sibling, y); /* copy entries to sibling */
479 clrentry(currNode, x);
480 decentries(currNode);
484 if (getkey(sibling, numentries(sibling)).name == NULL)
488 clrflag(currNode, isFULL);
489 if (numentries(currNode) == getminfanout(B, currNode))
490 setflag(currNode, FEWEST); /* never happens in even size nodes */
493 if (numentries(sibling) > getfanout(B))
496 if (numentries(sibling) == getfanout(B))
497 setflag(sibling, isFULL); /* only ever happens in 2-3+trees */
499 if (numentries(sibling) > getminfanout(B, sibling))
500 clrflag(sibling, FEWEST);
502 if (i) { /* set first pointer of internal node */
504 setfirstnode(sibling, getnode(currNode, split + k));
505 decentries(currNode);
506 if (numentries(currNode) == getminfanout(B, currNode))
507 setflag(currNode, FEWEST);
509 clrflag(currNode, FEWEST);
512 setfirstnode(sibling, downPtr);
515 if (j) { /* insert new entry into correct spot */
517 placeEntry(B, sibling, slot - split + 1 - i, downPtr);
519 placeEntry(B, currNode, slot + 1, downPtr);
521 /* set key separating nodes */
523 key = getkey(sibling, 1);
525 Nptr leaf = getfirstnode(sibling);
526 while ( isinternal(leaf) )
527 leaf = getfirstnode(leaf);
528 key = getkey(leaf, 1);
533 placeEntry(B, sibling, 1, downPtr);
537 /************ place key into appropriate node & slot ***************/
539 placeEntry(Tree *B, Nptr node, int slot, Nptr downPtr)
549 if (numentries(node) != 0 && getkey(node, numentries(node)).name == NULL)
552 for (x = numentries(node); x >= slot && x != 0; x--) /* make room for new entry */
553 pushentry(node, x, 1);
554 setentry(node, slot, getfunkey(B), downPtr);/* place it in correct slot */
556 incentries(node); /* adjust entry counter */
558 if (getkey(node, numentries(node)).name == NULL)
562 if (numentries(node) == getfanout(B))
563 setflag(node, isFULL);
564 if (numentries(node) > getminfanout(B, node))
565 clrflag(node, FEWEST);
567 setflag(node, FEWEST);
571 /***************** split full node and set flags *******************/
573 split(Tree *B, Nptr node)
577 sibling = getFreeNode(B);
579 setflag(sibling, FEWEST); /* set up node flags */
582 setflag(sibling, isLEAF);
583 setnextnode(sibling, getnextnode(node));/* adjust leaf pointers */
584 setnextnode(node, sibling);
586 if (getsplitpath(B) == node)
587 setsplitpath(B, NONODE); /* no more splitting needed */
590 clrflag(node, isROOT);
596 /********************** build new root node ************************/
598 makeNewRoot(Tree *B, Nptr oldRoot, Nptr newNode)
600 setroot(B, getFreeNode(B));
602 setfirstnode(getroot(B), oldRoot); /* old root becomes new root's child */
603 setentry(getroot(B), 1, getfunkey(B), newNode); /* old root's sibling also */
604 incentries(getroot(B));
606 if (numentries(getroot(B)) > getfanout(B))
610 /* the oldRoot's isROOT flag was cleared in split() */
611 setflag(getroot(B), isROOT);
612 setflag(getroot(B), FEWEST);
613 clrflag(getroot(B), isLEAF);
618 /***********************************************************************\
619 | Delete data from tree |
620 \***********************************************************************/
622 /********************** top level delete call **********************\
624 | The recursive call for deletion carries 5 additional parameters
625 | which may be needed to rebalance the B+tree when removing the key.
626 | These parameters are:
627 | 1. immediate left neighbor of the current node
628 | 2. immediate right neighbor of the current node
629 | 3. the anchor of the current node and left neighbor
630 | 4. the anchor of the current node and right neighbor
631 | 5. the parent of the current node
633 | All of these parameters are simple to calculate going along the
634 | recursive path to the leaf nodes and the point of key deletion.
635 | At that time, the algorithm determines which node manipulations
636 | are most efficient, that is, cause the least rearranging of data,
637 | and minimize the need for non-local key manipulation.
639 \***********************************************************************/
640 void delete(Tree *B, keyT key)
645 sprintf(B->message, "DELETE: key %s.\n", key.name);
646 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
649 setfunkey(B, key); /* set deletion key */
650 setmergepath(B, NONODE);
651 newNode = descendBalance(B, getroot(B), NONODE, NONODE, NONODE, NONODE, NONODE);
652 if (isnode(newNode)) {
654 sprintf(B->message, "DELETE: collapsing node %d", getnodenumber(B, newNode));
655 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
657 collapseRoot(B, getroot(B), newNode); /* remove root when superfluous */
662 /********************** remove old root node ***********************/
664 collapseRoot(Tree *B, Nptr oldRoot, Nptr newRoot)
668 sprintf(B->message, "COLLAPSE: old %d, new %d.\n", getnodenumber(B, oldRoot), getnodenumber(B, newRoot));
669 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
670 showNode(B, "collapseRoot oldRoot", oldRoot);
671 showNode(B, "collapseRoot newRoot", newRoot);
675 setflag(newRoot, isROOT);
676 clrflag(newRoot, FEWEST);
677 putFreeNode(B, oldRoot);
678 dectreeheight(B); /* the height of the tree decreases */
682 /**************** recurse down and balance back up *****************/
684 descendBalance(Tree *B, Nptr curr, Nptr left, Nptr right, Nptr lAnc, Nptr rAnc, Nptr parent)
686 Nptr newMe=NONODE, myLeft=NONODE, myRight=NONODE, lAnchor=NONODE, rAnchor=NONODE, newNode=NONODE;
687 int slot = 0, notleft = 0, notright = 0, fewleft = 0, fewright = 0, test = 0;
690 sprintf(B->message, "descendBalance curr %d, left %d, right %d, lAnc %d, rAnc %d, parent %d\n",
691 curr ? getnodenumber(B, curr) : -1,
692 left ? getnodenumber(B, left) : -1,
693 right ? getnodenumber(B, right) : -1,
694 lAnc ? getnodenumber(B, lAnc) : -1,
695 rAnc ? getnodenumber(B, rAnc) : -1,
696 parent ? getnodenumber(B, parent) : -1);
697 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
701 setmergepath(B,NONODE);
702 else if (getmergepath(B) == NONODE)
703 setmergepath(B, curr); /* mark which nodes may need rebalancing */
705 slot = getSlot(B, curr);
712 else if (slot == BTUPPER)
716 if (isinternal(curr)) /* set up next recursion call's parameters */
719 newNode = getfirstnode(curr);
720 myLeft = !isnode(left) ? NONODE : getlastnode(left);
724 newNode = getnode(curr, slot);
726 myLeft = getfirstnode(curr);
728 myLeft = getnode(curr, slot - 1);
732 if (slot == numentries(curr)) {
733 myRight = !isnode(right) ? NONODE : getfirstnode(right);
737 myRight = getnode(curr, slot + 1);
740 newMe = descendBalance(B, newNode, myLeft, myRight, lAnchor, rAnchor, curr);
742 else if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0))
748 newNode = getnode(curr, slot);
749 next = getdatanext(newNode);
752 * We only delete exact matches.
754 if (!comparekeys(B)(getfunkey(B), getdatakey(newNode), EXACT_MATCH)) {
755 /* exact match, free the first entry */
756 setnode(curr, slot, next);
758 if (next == NONODE) {
759 /* delete this key as there are no more data values */
762 /* otherwise, there are more and we leave the key in place */
763 setnode(curr, slot, next);
764 putFreeNode(B, newNode);
766 /* but do not delete the key */
768 setmergepath(B, NONODE);
770 } else if (next == NONODE) {
772 * we didn't find an exact match and there are no more
773 * choices. so we leave it alone and remove nothing.
776 setmergepath(B, NONODE);
778 /* The first data node doesn't match but there are other
779 * options. So we must determine if any of the next nodes
780 * are the one we are looking for.
785 if (!comparekeys(B)(getfunkey(B), getdatakey(next), EXACT_MATCH)) {
786 /* we found the one to delete */
787 getdatanext(prev) = getdatanext(next);
788 putFreeNode(B, next);
792 next = getdatanext(next);
795 /* do not delete the key */
797 setmergepath(B, NONODE);
802 newMe = NONODE; /* no deletion possible, key not found */
803 setmergepath(B, NONODE);
806 /***************** rebalancing tree after deletion *****************\
808 | The simplest B+tree rebalancing consists of the following rules.
810 | If a node underflows:
811 | CASE 1 check if it is the root, and collapse it if it is,
812 | CASE 2 otherwise, check if both of its neighbors are minimum
813 | sized and merge the underflowing node with one of them,
814 | CASE 3 otherwise shift surplus entries to the underflowing node.
816 | The choice of which neighbor to use is optional. However, the
817 | rebalancing rules that follow also ensure whenever possible
818 | that the merges and shifts which do occur use a neighbor whose
819 | anchor is the parent of the underflowing node.
821 | Cases 3, 4, 5 below are more an optimization than a requirement,
822 | and can be omitted, with a change of the action value in case 6,
823 | which actually corresponds to the third case described above.
825 \***********************************************************************/
827 /* begin deletion, working upwards from leaves */
829 if (newMe != NONODE) { /* this node removal doesn't consider duplicates */
831 sprintf(B->message, "descendBalance DELETE: slot %d, node %d.\n", slot, getnodenumber(B, curr));
832 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
835 removeEntry(B, curr, slot + (newMe != newNode)); /* removes one of two */
838 showNode(B, "descendBalance curr", curr);
842 if (getmergepath(B) == NONODE)
844 else { /* tree rebalancing rules for node merges and shifts */
845 notleft = !isnode(left);
846 notright = !isnode(right);
848 fewleft = isfew(left); /* only used when defined */
850 fewright = isfew(right);
852 /* CASE 1: prepare root node (curr) for removal */
853 if (notleft && notright) {
854 test = isleaf(curr); /* check if B+tree has become empty */
855 newNode = test ? NONODE : getfirstnode(curr);
857 /* CASE 2: the merging of two nodes is a must */
858 else if ((notleft || fewleft) && (notright || fewright)) {
859 test = (lAnc != parent);
860 newNode = test ? merge(B, curr, right, rAnc) : merge(B, left, curr, lAnc);
862 /* CASE 3: choose the better of a merge or a shift */
863 else if (!notleft && fewleft && !notright && !fewright) {
864 test = (rAnc != parent) && (curr == getmergepath(B));
865 newNode = test ? merge(B, left, curr, lAnc) : shift(B, curr, right, rAnc);
867 /* CASE 4: also choose between a merge or a shift */
868 else if (!notleft && !fewleft && !notright && fewright) {
869 test = !(lAnc == parent) && (curr == getmergepath(B));
870 newNode = test ? merge(B, curr, right, rAnc) : shift(B, left, curr, lAnc);
872 /* CASE 5: choose the more effective of two shifts */
873 else if (lAnc == rAnc) { /* => both anchors are the parent */
874 test = (numentries(left) <= numentries(right));
875 newNode = test ? shift(B, curr, right, rAnc) : shift(B, left, curr, lAnc);
877 /* CASE 6: choose the shift with more local effect */
878 else { /* if omitting cases 3,4,5 use below */
879 test = (lAnc == parent); /* test = (!notleft && !fewleft); */
880 newNode = test ? shift(B, left, curr, lAnc) : shift(B, curr, right, rAnc);
885 sprintf(B->message, "descendBalance returns %d\n", getnodenumber(B, newNode));
886 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
892 /**************** remove key and pointer from node *****************/
894 removeEntry(Tree *B, Nptr curr, int slot)
898 putFreeNode(B, getnode(curr, slot)); /* return deleted node to free list */
899 for (x = slot; x < numentries(curr); x++)
900 pullentry(curr, x, 1); /* adjust node with removed key */
902 clrflag(curr, isFULL); /* keep flag information up to date */
903 if (numentries(curr) > getminfanout(B, curr))
904 clrflag(curr, FEWEST);
906 setflag(curr, FEWEST);
910 /******* merge a node pair & set emptied node up for removal *******/
912 merge(Tree *B, Nptr left, Nptr right, Nptr anchor)
917 sprintf(B->message, "MERGE: left %d, right %d.\n", getnodenumber(B, left), getnodenumber(B, right));
918 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
919 showNode(B, "pre-merge anchor", anchor);
920 showNode(B, "pre-merge left", left);
921 showNode(B, "pre-merge right", right);
924 if (isinternal(left)) {
925 incentries(left); /* copy key separating the nodes */
927 if (numentries(left) > getfanout(B))
930 setfunkey(B, getkey(right, 1)); /* defined but maybe just deleted */
931 z = getSlot(B, anchor); /* needs the just calculated key */
934 setfunkey(B, getkey(anchor, z)); /* set slot to delete in anchor */
935 setentry(left, numentries(left), getfunkey(B), getfirstnode(right));
938 setnextnode(left, getnextnode(right));
940 for (x = numentries(left) + 1, y = 1; y <= numentries(right); x++, y++) {
943 if (numentries(left) > getfanout(B))
946 xferentry(right, y, left, x); /* transfer entries to left node */
950 if (numentries(left) > getminfanout(B, left))
951 clrflag(left, FEWEST);
952 if (numentries(left) == getfanout(B))
953 setflag(left, isFULL); /* never happens in even size nodes */
955 if (getmergepath(B) == left || getmergepath(B) == right)
956 setmergepath(B, NONODE); /* indicate rebalancing is complete */
959 showNode(B, "post-merge anchor", anchor);
960 showNode(B, "post-merge left", left);
961 showNode(B, "post-merge right", right);
967 /****** shift entries in a node pair & adjust anchor key value *****/
969 shift(Tree *B, Nptr left, Nptr right, Nptr anchor)
974 sprintf(B->message, "SHIFT: left %d, right %d, anchor %d.\n",
975 getnodenumber(B, left),
976 getnodenumber(B, right),
977 getnodenumber(B, anchor));
978 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
979 showNode(B, "pre-shift anchor", anchor);
980 showNode(B, "pre-shift left", left);
981 showNode(B, "pre-shift right", right);
984 i = isinternal(left);
986 if (numentries(left) < numentries(right)) { /* shift entries to left */
987 y = (numentries(right) - numentries(left)) >> 1;
988 x = numentries(left) + y;
989 setfunkey(B, getkey(right, y + 1 - i)); /* set new anchor key value */
990 z = getSlot(B, anchor); /* find slot in anchor node */
994 if (z == 0 && !isroot(anchor))
997 if (i) { /* move out old anchor value */
998 decentries(right); /* adjust for shifting anchor */
1001 if (numentries(left) > getfanout(B))
1004 setentry(left, numentries(left), getkey(anchor, z), getfirstnode(right));
1005 setfirstnode(right, getnode(right, y + 1 - i));
1007 clrflag(right, isFULL);
1008 setkey(anchor, z, getfunkey(B)); /* set new anchor value */
1009 for (z = y, y -= i; y > 0; y--, x--) {
1010 decentries(right); /* adjust entry count */
1013 if (numentries(left) > getfanout(B))
1016 xferentry(right, y, left, x); /* transfer entries over */
1019 for (x = 1; x <= numentries(right); x++) /* adjust reduced node */
1020 pullentry(right, x, z);
1022 else if (numentries(left) > numentries(right)) { /* shift entries to right */
1023 y = (numentries(left) - numentries(right)) >> 1;
1024 x = numentries(left) - y + 1;
1026 for (z = numentries(right); z > 0; z--) /* adjust increased node */
1027 pushentry(right, z, y);
1029 setfunkey(B, getkey(left, x)); /* set new anchor key value */
1030 z = getSlot(B, anchor);
1039 if (numentries(right) > getfanout(B))
1042 setentry(right, y, getkey(anchor, z), getfirstnode(right));
1043 setfirstnode(right, getnode(left, x));
1045 clrflag(left, isFULL);
1046 setkey(anchor, z, getfunkey(B));
1047 for (x = numentries(left) + i, y -= i; y > 0; y--, x--) {
1051 if (numentries(right) > getfanout(B))
1054 xferentry(left, x, right, y); /* transfer entries over */
1062 #endif /* DEBUG_BTREE */
1064 if (numentries(left) > getminfanout(B, left)) /* adjust node flags */
1065 clrflag(left, FEWEST); /* never happens in 2-3+trees */
1067 setflag(left, FEWEST);
1068 if (numentries(right) > getminfanout(B, right))
1069 clrflag(right, FEWEST); /* never happens in 2-3+trees */
1071 setflag(right, FEWEST);
1072 setmergepath(B, NONODE);
1075 sprintf(B->message, "SHIFT: left %d, right %d.\n", getnodenumber(B, left), getnodenumber(B, right));
1076 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1077 showNode(B, "post-shift anchor", anchor);
1078 showNode(B, "post-shift left", left);
1079 showNode(B, "post-shift right", right);
1087 _clrentry(Nptr node, int entry)
1089 if (getkey(node,entry).name != NULL) {
1090 free(getkey(node,entry).name);
1091 getkey(node,entry).name = NULL;
1093 getnode(node,entry) = NONODE;
1097 _pushentry(Nptr node, int entry, int offset)
1099 if (getkey(node,entry + offset).name != NULL)
1100 free(getkey(node,entry + offset).name);
1105 getkey(node,entry + offset).name = strdup(getkey(node,entry).name);
1107 if ( getnode(node, entry) == NONODE )
1110 getnode(node,entry + offset) = getnode(node,entry);
1114 _pullentry(Nptr node, int entry, int offset)
1116 if (getkey(node,entry).name != NULL)
1117 free(getkey(node,entry).name);
1118 getkey(node,entry).name = strdup(getkey(node,entry + offset).name);
1120 if ( getnode(node, entry + offset) == NONODE )
1123 getnode(node,entry) = getnode(node,entry + offset);
1127 _xferentry(Nptr srcNode, int srcEntry, Nptr destNode, int destEntry)
1129 if (getkey(destNode,destEntry).name != NULL)
1130 free(getkey(destNode,destEntry).name);
1131 getkey(destNode,destEntry).name = strdup(getkey(srcNode,srcEntry).name);
1133 if ( getnode(srcNode, srcEntry) == NONODE )
1136 getnode(destNode,destEntry) = getnode(srcNode,srcEntry);
1140 _setentry(Nptr node, int entry, keyT key, Nptr downNode)
1142 if (getkey(node,entry).name != NULL)
1143 free(getkey(node,entry).name);
1144 getkey(node,entry).name = strdup(key.name);
1146 if ( downNode == NONODE )
1149 getnode(node,entry) = downNode;
1153 /***********************************************************************\
1154 | Empty Node Utilities |
1155 \***********************************************************************/
1157 /********************* Set up initial pool of free nodes ***********/
1159 initFreeNodePool(Tree *B, int quantity)
1164 setfirstallnode(B, NONODE);
1165 setfirstfreenode(B, NONODE);
1167 for (i = 0, p = NONODE; i < quantity; i++) {
1168 n = malloc(sizeof(*n));
1169 memset(n, 0, sizeof(*n));
1170 setnodenumber(B,n,i);
1173 setnextnode(p, n); /* insert node into free node list */
1176 setfirstfreenode(B, n);
1177 setfirstallnode(B, n);
1181 setnextnode(p, NONODE); /* indicates end of free node list */
1182 setallnode(p, NONODE); /* indicates end of all node list */
1184 setpoolsize(B, quantity);
1188 /******************* Cleanup Free Node Pool **************************/
1191 cleanupNodePool(Tree *B)
1196 for ( i=0, node = getfirstallnode(B); node != NONODE && i<getpoolsize(B); node = next, i++ ) {
1198 if ( getdatakey(node).name ) {
1199 free(getdatakey(node).name);
1200 getdatakey(node).name = NULL;
1202 if ( getdatavalue(node).longname ) {
1203 free(getdatavalue(node).longname);
1204 getdatavalue(node).longname = NULL;
1206 } else { /* data node */
1207 for ( j=1; j<=getfanout(B); j++ ) {
1208 if (getkey(node, j).name)
1209 free(getkey(node, j).name);
1212 next = getallnode(node);
1217 /************** take a free B+tree node from the pool **************/
1219 getFreeNode(Tree *B)
1221 Nptr newNode = getfirstfreenode(B);
1223 if (newNode != NONODE) {
1224 setfirstfreenode(B, getnextnode(newNode)); /* adjust free node list */
1225 setnextnode(newNode, NONODE); /* remove node from list */
1228 newNode = malloc(sizeof(*newNode));
1229 memset(newNode, 0, sizeof(*newNode));
1231 setallnode(newNode, getfirstallnode(B));
1232 setfirstallnode(B, newNode);
1233 setnodenumber(B, newNode, getpoolsize(B));
1234 setpoolsize(B, getpoolsize(B) + 1);
1237 clearflags(newNode); /* Sets MAGIC */
1242 /************* return a deleted B+tree node to the pool ************/
1244 putFreeNode(Tree *B, Nptr node)
1252 if ( getdatakey(node).name )
1253 free(getdatakey(node).name);
1254 } else { /* data node */
1255 for ( i=1; i<=getfanout(B); i++ ) {
1256 if (getkey(node, i).name)
1257 free(getkey(node, i).name);
1261 /* free nodes do not have MAGIC set */
1262 memset(&nAdr(node), 0, sizeof(nAdr(node)));
1263 setnextnode(node, getfirstfreenode(B)); /* add node to list */
1264 setfirstfreenode(B, node); /* set it to be list head */
1268 /******* fill a free data node with a key and associated data ******/
1270 getDataNode(Tree *B, keyT key, dataT data)
1272 Nptr newNode = getFreeNode(B);
1274 setflag(newNode, isDATA);
1275 getdatakey(newNode).name = strdup(key.name);
1276 getdatavalue(newNode) = data;
1277 getdatanext(newNode) = NONODE;
1284 /***********************************************************************\
1285 | B+tree Printing Utilities |
1286 \***********************************************************************/
1288 /*********************** B+tree node printer ***********************/
1289 void showNode(Tree *B, const char * where, Nptr n)
1293 sprintf(B->message, "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1294 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1295 sprintf(B->message, "| %-20s |\n", where);
1296 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1297 sprintf(B->message, "| node %6d ", getnodenumber(B, n));
1298 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1299 sprintf(B->message, " magic %4x |\n", getmagic(n));
1300 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1301 sprintf(B->message, "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1302 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1303 sprintf(B->message, "| flags %1d%1d%1d%1d ", isfew(n), isfull(n), isroot(n), isleaf(n));
1304 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1305 sprintf(B->message, "| keys = %5d ", numentries(n));
1306 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1307 sprintf(B->message, "| node = %6d |\n", getnodenumber(B, getfirstnode(n)));
1308 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1309 for (x = 1; x <= numentries(n); x++) {
1310 sprintf(B->message, "| entry %6d ", x);
1311 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1312 sprintf(B->message, "| key = %6s ", getkey(n, x).name);
1313 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1314 sprintf(B->message, "| node = %6d |\n", getnodenumber(B, getnode(n, x)));
1315 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1317 sprintf(B->message, "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1318 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1321 /****************** B+tree class variable printer ******************/
1322 void showBtree(Tree *B)
1324 sprintf(B->message, "- -- -- -- -- -- -\n");
1325 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1326 sprintf(B->message, "| B+tree %10p |\n", (void *) B);
1327 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1328 sprintf(B->message, "- -- -- -- -- -- -\n");
1329 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1330 sprintf(B->message, "| root %6d |\n", getnodenumber(B, getroot(B)));
1331 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1332 sprintf(B->message, "| leaf %6d |\n", getnodenumber(B, getleaf(B)));
1333 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1334 sprintf(B->message, "| fanout %3d |\n", getfanout(B) + 1);
1335 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1336 sprintf(B->message, "| minfanout %3d |\n", getminfanout(B, getroot(B)) + 1);
1337 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1338 sprintf(B->message, "| height %3d |\n", gettreeheight(B));
1339 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1340 sprintf(B->message, "| freenode %6d |\n", getnodenumber(B, getfirstfreenode(B)));
1341 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1342 sprintf(B->message, "| theKey %6s |\n", getfunkey(B).name);
1343 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1344 sprintf(B->message, "| theData %d.%d.%d |\n", getfundata(B).volume,
1345 getfundata(B).vnode, getfundata(B).unique);
1346 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1347 sprintf(B->message, "- -- -- -- -- -- -\n");
1348 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1352 listBtreeNodes(Tree *B, const char * parent_desc, Nptr node)
1358 if (isntnode(node)) {
1359 sprintf(B->message, "%s - NoNode!!!\n");
1360 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1366 data = getdatavalue(node);
1367 sprintf(B->message, "%s - data node %d (%d.%d.%d)\n",
1368 parent_desc, getnodenumber(B, node),
1369 data.fid.volume, data.fid.vnode, data.fid.unique);
1370 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1373 showNode(B, parent_desc, node);
1375 if ( isinternal(node) || isroot(node) ) {
1376 sprintf(thisnode, "parent %6d", getnodenumber(B , node));
1378 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1379 for ( i= isinternal(node) ? 0 : 1; i <= numentries(node); i++ ) {
1380 listBtreeNodes(B, thisnode, i == 0 ? getfirstnode(node) : getnode(node, i));
1385 /*********************** B+tree data printer ***********************/
1387 listBtreeValues(Tree *B, Nptr n, int num)
1393 for (slot = 1; (n != NONODE) && num && numentries(n); num--) {
1394 if (comparekeys(B)(getkey(n, slot),prev, 0) < 0) {
1395 sprintf(B->message, "BOMB %8s\n", getkey(n, slot).name);
1396 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1399 prev = getkey(n, slot);
1400 data = getdatavalue(getnode(n, slot));
1401 sprintf(B->message, "%8s (%d.%d.%d)\n",
1402 prev.name, data.fid.volume, data.fid.vnode, data.fid.unique);
1403 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1404 if (++slot > numentries(n))
1405 n = getnextnode(n), slot = 1;
1407 sprintf(B->message, "\n\n");
1408 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1411 /******************** entire B+tree data printer *******************/
1413 listAllBtreeValues(Tree *B)
1415 listBtreeValues(B, getleaf(B), BTERROR);
1417 #endif /* DEBUG_BTREE */
1420 findAllBtreeValues(Tree *B)
1423 Nptr n = getleaf(B), l;
1427 for (slot = 1; (n != NONODE) && num && numentries(n); num--) {
1428 if (comparekeys(B)(getkey(n, slot),prev, 0) < 0) {
1429 sprintf(B->message,"BOMB %8s\n", getkey(n, slot).name);
1430 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1435 prev = getkey(n, slot);
1436 l = bplus_Lookup(B, prev);
1439 sprintf(B->message,"BOMB %8s cannot be found\n", prev.name);
1441 sprintf(B->message,"BOMB lookup(%8s) finds wrong node\n", prev.name);
1442 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1448 if (++slot > numentries(n))
1449 n = getnextnode(n), slot = 1;
1454 * the return must be -1, 0, or 1. stricmp() in MSVC 8.0
1455 * does not return only those values.
1457 * the sorting of the tree is by case insensitive sort order
1458 * therefore, unless the strings actually match via a case
1459 * insensitive search do we want to perform the case sensitive
1460 * match. Otherwise, the search order might be considered
1461 * to be inconsistent when the EXACT_MATCH flag is set.
1464 compareKeys(keyT key1, keyT key2, int flags)
1468 comp = stricmp(key1.name, key2.name);
1469 if (comp == 0 && (flags & EXACT_MATCH))
1470 comp = strcmp(key1.name, key2.name);
1471 return (comp < 0 ? -1 : (comp > 0 ? 1 : 0));
1474 /* Look up a file name in directory.
1477 op->scp->dirlock is read locked
1480 op->scp->dirlock is read locked
1483 cm_BPlusDirLookup(cm_dirOp_t * op, char *entry, cm_fid_t * cfid)
1487 Nptr leafNode = NONODE;
1488 LARGE_INTEGER start, end;
1490 if (op->scp->dirBplus == NULL ||
1491 op->dataVersion != op->scp->dirDataVersion) {
1496 lock_AssertAny(&op->scp->dirlock);
1498 QueryPerformanceCounter(&start);
1500 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1501 if (leafNode != NONODE) {
1503 Nptr firstDataNode, dataNode, nextDataNode;
1507 /* Found a leaf that matches the key via a case-insensitive
1508 * match. There may be one or more data nodes that match.
1509 * If we have an exact match, return that.
1510 * If we have an ambiguous match, return an error.
1511 * If we have only one inexact match, return that.
1513 slot = getSlot(op->scp->dirBplus, leafNode);
1514 if (slot <= BTERROR) {
1515 op->scp->dirDataVersion = 0;
1516 rc = (slot == BTERROR ? EINVAL : ENOENT);
1519 firstDataNode = getnode(leafNode, slot);
1521 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1523 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1527 nextDataNode = getdatanext(dataNode);
1531 *cfid = getdatavalue(dataNode).fid;
1533 bplus_lookup_hits++;
1534 } else if (count == 1) {
1535 *cfid = getdatavalue(firstDataNode).fid;
1536 rc = CM_ERROR_INEXACT_MATCH;
1537 bplus_lookup_hits_inexact++;
1539 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1540 bplus_lookup_ambiguous++;
1544 bplus_lookup_misses++;
1547 QueryPerformanceCounter(&end);
1549 bplus_lookup_time += (end.QuadPart - start.QuadPart);
1558 op->scp->dirlock is write locked
1561 op->scp->dirlock is write locked
1563 long cm_BPlusDirCreateEntry(cm_dirOp_t * op, char *entry, cm_fid_t * cfid)
1568 LARGE_INTEGER start, end;
1571 if (op->scp->dirBplus == NULL ||
1572 op->dataVersion != op->scp->dirDataVersion) {
1578 lock_AssertWrite(&op->scp->dirlock);
1580 data.fid.cell = cfid->cell;
1581 data.fid.volume = cfid->volume;
1582 data.fid.vnode = cfid->vnode;
1583 data.fid.unique = cfid->unique;
1584 data.longname = NULL;
1586 QueryPerformanceCounter(&start);
1587 bplus_create_entry++;
1589 insert(op->scp->dirBplus, key, data);
1590 if (!cm_Is8Dot3(entry)) {
1592 dfid.vnode = htonl(data.fid.vnode);
1593 dfid.unique = htonl(data.fid.unique);
1595 cm_Gen8Dot3NameInt(entry, &dfid, shortName, NULL);
1597 key.name = shortName;
1598 data.longname = strdup(entry);
1599 insert(op->scp->dirBplus, key, data);
1602 QueryPerformanceCounter(&end);
1604 bplus_create_time += (end.QuadPart - start.QuadPart);
1613 op->scp->dirlock is write locked
1616 op->scp->dirlock is write locked
1618 int cm_BPlusDirDeleteEntry(cm_dirOp_t * op, char *entry)
1622 Nptr leafNode = NONODE;
1623 LARGE_INTEGER start, end;
1625 if (op->scp->dirBplus == NULL ||
1626 op->dataVersion != op->scp->dirDataVersion) {
1631 lock_AssertWrite(&op->scp->dirlock);
1633 QueryPerformanceCounter(&start);
1635 bplus_remove_entry++;
1637 if (op->scp->dirBplus) {
1638 if (!cm_Is8Dot3(entry)) {
1643 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1644 if (leafNode != NONODE) {
1646 Nptr firstDataNode, dataNode, nextDataNode;
1650 /* Found a leaf that matches the key via a case-insensitive
1651 * match. There may be one or more data nodes that match.
1652 * If we have an exact match, return that.
1653 * If we have an ambiguous match, return an error.
1654 * If we have only one inexact match, return that.
1656 slot = getSlot(op->scp->dirBplus, leafNode);
1657 if (slot <= BTERROR) {
1658 op->scp->dirDataVersion = 0;
1662 firstDataNode = getnode(leafNode, slot);
1664 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1666 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1670 nextDataNode = getdatanext(dataNode);
1674 fid = getdatavalue(dataNode).fid;
1676 } else if (count == 1) {
1677 fid = getdatavalue(firstDataNode).fid;
1678 rc = CM_ERROR_INEXACT_MATCH;
1680 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1685 if (rc != CM_ERROR_AMBIGUOUS_FILENAME) {
1686 dfid.vnode = htonl(fid.vnode);
1687 dfid.unique = htonl(fid.unique);
1688 cm_Gen8Dot3NameInt(entry, &dfid, shortName, NULL);
1690 /* delete first the long name and then the short name */
1691 delete(op->scp->dirBplus, key);
1692 key.name = shortName;
1693 delete(op->scp->dirBplus, key);
1696 char * longname = NULL;
1697 /* We need to lookup the 8dot3 name to determine what the
1698 * matching long name is
1700 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1701 if (leafNode != NONODE) {
1703 Nptr firstDataNode, dataNode, nextDataNode;
1707 /* Found a leaf that matches the key via a case-insensitive
1708 * match. There may be one or more data nodes that match.
1709 * If we have an exact match, return that.
1710 * If we have an ambiguous match, return an error.
1711 * If we have only one inexact match, return that.
1713 slot = getSlot(op->scp->dirBplus, leafNode);
1714 if (slot <= BTERROR) {
1715 op->scp->dirDataVersion = 0;
1720 firstDataNode = getnode(leafNode, slot);
1722 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1724 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1728 nextDataNode = getdatanext(dataNode);
1732 longname = getdatavalue(dataNode).longname;
1734 } else if (count == 1) {
1735 longname = getdatavalue(firstDataNode).longname;
1736 rc = CM_ERROR_INEXACT_MATCH;
1738 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1742 if (rc != CM_ERROR_AMBIGUOUS_FILENAME) {
1744 key.name = longname;
1745 delete(op->scp->dirBplus, key);
1748 delete(op->scp->dirBplus, key);
1753 QueryPerformanceCounter(&end);
1755 bplus_remove_time += (end.QuadPart - start.QuadPart);
1763 int cm_BPlusDirFoo(struct cm_scache *scp, struct cm_dirEntry *dep,
1764 void *dummy, osi_hyper_t *entryOffsetp)
1766 keyT key = {dep->name};
1770 data.fid.cell = scp->fid.cell;
1771 data.fid.volume = scp->fid.volume;
1772 data.fid.vnode = ntohl(dep->fid.vnode);
1773 data.fid.unique = ntohl(dep->fid.unique);
1774 data.longname = NULL;
1776 /* the Write lock is held in cm_BPlusDirBuildTree() */
1777 insert(scp->dirBplus, key, data);
1778 if (!cm_Is8Dot3(dep->name)) {
1780 dfid.vnode = dep->fid.vnode;
1781 dfid.unique = dep->fid.unique;
1783 cm_Gen8Dot3NameInt(dep->name, &dfid, shortName, NULL);
1785 key.name = shortName;
1786 data.longname = strdup(dep->name);
1787 insert(scp->dirBplus, key, data);
1791 findAllBtreeValues(scp->dirBplus);
1798 * scp->dirlock must be writeLocked before call
1800 * scp->mutex must not be held
1802 long cm_BPlusDirBuildTree(cm_scache_t *scp, cm_user_t *userp, cm_req_t* reqp)
1806 LARGE_INTEGER start, end;
1808 osi_assertx(scp->dirBplus == NULL, "cm_BPlusDirBuildTree called on non-empty tree");
1810 lock_AssertWrite(&scp->dirlock);
1812 QueryPerformanceCounter(&start);
1815 if (scp->dirBplus == NULL) {
1816 scp->dirBplus = initBtree(64, MAX_FANOUT, compareKeys);
1818 if (scp->dirBplus == NULL) {
1822 thyper.HighPart = 0;
1823 rc = cm_ApplyDir(scp, cm_BPlusDirFoo, NULL, &thyper, userp, reqp, NULL);
1826 QueryPerformanceCounter(&end);
1828 bplus_build_time += (end.QuadPart - start.QuadPart);
1831 cm_BPlusDirEnumTest(scp, 1);
1836 int cm_MemDumpBPlusStats(FILE *outputFile, char *cookie, int lock)
1841 sprintf(output, "%s - B+ Lookup Hits: %-8d\r\n", cookie, bplus_lookup_hits);
1842 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1843 sprintf(output, "%s - Inexact Hits: %-8d\r\n", cookie, bplus_lookup_hits_inexact);
1844 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1845 sprintf(output, "%s - Ambiguous Hits: %-8d\r\n", cookie, bplus_lookup_ambiguous);
1846 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1847 sprintf(output, "%s - Misses: %-8d\r\n", cookie, bplus_lookup_misses);
1848 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1849 sprintf(output, "%s - Create: %-8d\r\n", cookie, bplus_create_entry);
1850 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1851 sprintf(output, "%s - Remove: %-8d\r\n", cookie, bplus_remove_entry);
1852 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1853 sprintf(output, "%s - Build Tree: %-8d\r\n", cookie, bplus_build_tree);
1854 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1855 sprintf(output, "%s - Free Tree: %-8d\r\n", cookie, bplus_free_tree);
1856 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1857 sprintf(output, "%s - DV Error: %-8d\r\n", cookie, bplus_dv_error);
1858 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1860 sprintf(output, "%s - B+ Time Lookup: %-16I64d\r\n", cookie, bplus_lookup_time);
1861 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1862 sprintf(output, "%s - Create: %-16I64d\r\n", cookie, bplus_create_time);
1863 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1864 sprintf(output, "%s - Remove: %-16I64d\r\n", cookie, bplus_remove_time);
1865 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1866 sprintf(output, "%s - Build: %-16I64d\r\n", cookie, bplus_build_time);
1867 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1868 sprintf(output, "%s - Free: %-16I64d\r\n", cookie, bplus_free_time);
1869 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1874 void cm_BPlusDumpStats(void)
1876 afsi_log("B+ Lookup Hits: %-8d", bplus_lookup_hits);
1877 afsi_log(" Inexact Hits: %-8d", bplus_lookup_hits_inexact);
1878 afsi_log(" Ambiguous Hits: %-8d", bplus_lookup_ambiguous);
1879 afsi_log(" Misses: %-8d", bplus_lookup_misses);
1880 afsi_log(" Create: %-8d", bplus_create_entry);
1881 afsi_log(" Remove: %-8d", bplus_remove_entry);
1882 afsi_log(" Build Tree: %-8d", bplus_build_tree);
1883 afsi_log(" Free Tree: %-8d", bplus_free_tree);
1884 afsi_log(" DV Error: %-8d", bplus_dv_error);
1886 afsi_log("B+ Time Lookup: %-16I64d", bplus_lookup_time);
1887 afsi_log(" Create: %-16I64d", bplus_create_time);
1888 afsi_log(" Remove: %-16I64d", bplus_remove_time);
1889 afsi_log(" Build: %-16I64d", bplus_build_time);
1890 afsi_log(" Free: %-16I64d", bplus_free_time);
1893 static cm_direnum_t *
1894 cm_BPlusEnumAlloc(afs_uint32 entries)
1896 cm_direnum_t * enump;
1902 size = sizeof(cm_direnum_t)+(entries-1)*sizeof(cm_direnum_entry_t);
1903 enump = (cm_direnum_t *)malloc(size);
1904 memset(enump, 0, size);
1905 enump->count = entries;
1910 cm_BPlusDirEnumerate(cm_scache_t *scp, afs_uint32 locked,
1911 char * maskp, cm_direnum_t **enumpp)
1913 afs_uint32 count = 0, slot, numentries;
1914 Nptr leafNode = NONODE, nextLeafNode;
1915 Nptr firstDataNode, dataNode, nextDataNode;
1916 cm_direnum_t * enump;
1920 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate start");
1922 /* Read lock the bplus tree so the data can't change */
1924 lock_ObtainRead(&scp->dirlock);
1926 if (scp->dirBplus == NULL) {
1927 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate No BPlus Tree");
1931 /* Compute the number of entries */
1932 for (count = 0, leafNode = getleaf(scp->dirBplus); leafNode; leafNode = nextLeafNode) {
1934 for ( slot = 1, numentries = numentries(leafNode); slot <= numentries; slot++) {
1935 firstDataNode = getnode(leafNode, slot);
1937 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1938 if (maskp == NULL) {
1939 /* name is in getdatakey(dataNode) */
1940 if (getdatavalue(dataNode).longname != NULL ||
1941 cm_Is8Dot3(getdatakey(dataNode).name))
1944 if (cm_Is8Dot3(getdatakey(dataNode).name) &&
1945 smb_V3MatchMask(getdatakey(dataNode).name, maskp, CM_FLAG_CASEFOLD) ||
1946 getdatavalue(dataNode).longname == NULL &&
1947 smb_V3MatchMask(getdatavalue(dataNode).longname, maskp, CM_FLAG_CASEFOLD))
1950 nextDataNode = getdatanext(dataNode);
1954 nextLeafNode = getnextnode(leafNode);
1957 sprintf(buffer, "BPlusTreeEnumerate count = %d", count);
1958 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, buffer));
1960 /* Allocate the enumeration object */
1961 enump = cm_BPlusEnumAlloc(count);
1962 if (enump == NULL) {
1963 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate Alloc failed");
1968 /* Copy the name and fid for each longname entry into the enumeration */
1969 for (count = 0, leafNode = getleaf(scp->dirBplus); leafNode; leafNode = nextLeafNode) {
1971 for ( slot = 1, numentries = numentries(leafNode); slot <= numentries; slot++) {
1972 firstDataNode = getnode(leafNode, slot);
1974 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1979 if (maskp == NULL) {
1980 if (getdatavalue(dataNode).longname != NULL ||
1981 cm_Is8Dot3(getdatakey(dataNode).name))
1986 if (cm_Is8Dot3(getdatakey(dataNode).name) &&
1987 smb_V3MatchMask(getdatakey(dataNode).name, maskp, CM_FLAG_CASEFOLD) ||
1988 getdatavalue(dataNode).longname == NULL &&
1989 smb_V3MatchMask(getdatavalue(dataNode).longname, maskp, CM_FLAG_CASEFOLD))
1996 if (getdatavalue(dataNode).longname) {
1997 name = strdup(getdatavalue(dataNode).longname);
2000 name = strdup(getdatakey(dataNode).name);
2005 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate strdup failed");
2009 enump->entry[count].name = name;
2010 enump->entry[count].fid = getdatavalue(dataNode).fid;
2012 strncpy(enump->entry[count].shortName, getdatakey(dataNode).name,
2013 sizeof(enump->entry[count].shortName));
2015 enump->entry[count].shortName[0] = '\0';
2018 nextDataNode = getdatanext(dataNode);
2022 nextLeafNode = getnextnode(leafNode);
2027 lock_ReleaseRead(&scp->dirlock);
2029 /* if we failed, cleanup any mess */
2031 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate rc != 0");
2033 for ( count = 0; count < enump->count && enump->entry[count].name; count++ ) {
2034 free(enump->entry[count].name);
2041 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate end");
2047 cm_BPlusDirNextEnumEntry(cm_direnum_t *enump, cm_direnum_entry_t **entrypp)
2049 if (enump == NULL || entrypp == NULL || enump->next > enump->count) {
2052 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry invalid input");
2053 return CM_ERROR_INVAL; \
2056 *entrypp = &enump->entry[enump->next++];
2057 if ( enump->next == enump->count ) {
2058 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry STOPNOW");
2059 return CM_ERROR_STOPNOW;
2062 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry SUCCESS");
2068 cm_BPlusDirFreeEnumeration(cm_direnum_t *enump)
2072 osi_Log0(afsd_logp, "cm_BPlusDirFreeEnumeration");
2075 for ( count = 0; count < enump->count && enump->entry[count].name; count++ ) {
2076 free(enump->entry[count].name);
2084 cm_BPlusDirEnumTest(cm_scache_t * dscp, afs_uint32 locked)
2086 cm_direnum_t * enump = NULL;
2087 cm_direnum_entry_t * entryp;
2090 osi_Log0(afsd_logp, "cm_BPlusDirEnumTest start");
2092 for (code = cm_BPlusDirEnumerate(dscp, locked, NULL, &enump); code == 0; ) {
2093 code = cm_BPlusDirNextEnumEntry(enump, &entryp);
2094 if (code == 0 || code == CM_ERROR_STOPNOW) {
2097 char * type = "ScpNotFound";
2100 scp = cm_FindSCache(&entryp->fid);
2102 switch (scp->fileType) {
2103 case CM_SCACHETYPE_FILE :
2106 case CM_SCACHETYPE_DIRECTORY :
2109 case CM_SCACHETYPE_SYMLINK :
2112 case CM_SCACHETYPE_MOUNTPOINT:
2113 type = "MountPoint";
2115 case CM_SCACHETYPE_DFSLINK :
2118 case CM_SCACHETYPE_INVALID :
2126 dv = scp->dataVersion;
2127 cm_ReleaseSCache(scp);
2130 sprintf(buffer, "'%s' Fid = (%d,%d,%d,%d) Short = '%s' Type %s DV %d",
2132 entryp->fid.cell, entryp->fid.volume, entryp->fid.vnode, entryp->fid.unique,
2137 osi_Log0(afsd_logp, osi_LogSaveString(afsd_logp, buffer));
2142 cm_BPlusDirFreeEnumeration(enump);
2144 osi_Log0(afsd_logp, "cm_BPlusDirEnumTest end");
2148 #endif /* USE_BPLUS */