2 * Copyright 2007 Secure Endpoints Inc.
6 * This software has been released under the terms of the IBM Public
7 * License. For details, see the LICENSE file in the top-level source
8 * directory or online at http://www.openafs.org/dl/license10.html
10 * Thanks to Jan Jannink for B+ tree algorithms.
22 /******************* statistics globals *********************************/
23 afs_uint32 bplus_lookup_hits = 0;
24 afs_uint32 bplus_lookup_hits_inexact = 0;
25 afs_uint32 bplus_lookup_misses = 0;
26 afs_uint32 bplus_lookup_ambiguous = 0;
27 afs_uint32 bplus_create_entry = 0;
28 afs_uint32 bplus_remove_entry = 0;
29 afs_uint32 bplus_build_tree = 0;
30 afs_uint32 bplus_free_tree = 0;
31 afs_uint32 bplus_dv_error = 0;
33 afs_uint64 bplus_lookup_time = 0;
34 afs_uint64 bplus_create_time = 0;
35 afs_uint64 bplus_remove_time = 0;
36 afs_uint64 bplus_build_time = 0;
37 afs_uint64 bplus_free_time = 0;
39 /*********************** private functions *************************/
40 static void initFreeNodePool(Tree *B, int quantity);
41 static Nptr getFreeNode(Tree *B);
42 static void putFreeNode(Tree *B, Nptr self);
43 static void cleanupNodePool(Tree *B);
45 static Nptr descendToLeaf(Tree *B, Nptr curr);
46 static int getSlot(Tree *B, Nptr curr);
47 static int findKey(Tree *B, Nptr curr, int lo, int hi);
48 static int bestMatch(Tree *B, Nptr curr, int slot);
50 static Nptr getDataNode(Tree *B, keyT key, dataT data);
51 static Nptr descendSplit(Tree *B, Nptr curr);
52 static void insertEntry(Tree *B, Nptr node, int slot, Nptr sibling, Nptr downPtr);
53 static void placeEntry(Tree *B, Nptr node, int slot, Nptr downPtr);
54 static Nptr split(Tree *B, Nptr node);
55 static void makeNewRoot(Tree *B, Nptr oldRoot, Nptr newNode);
57 static Nptr descendBalance(Tree *B, Nptr curr, Nptr left, Nptr right, Nptr lAnc, Nptr rAnc, Nptr parent);
58 static void collapseRoot(Tree *B, Nptr oldRoot, Nptr newRoot);
59 static void removeEntry(Tree *B, Nptr curr, int slot);
60 static Nptr merge(Tree *B, Nptr left, Nptr right, Nptr anchor);
61 static Nptr shift(Tree *B, Nptr left, Nptr right, Nptr anchor);
63 static void _clrentry(Nptr node, int entry);
64 static void _pushentry(Nptr node, int entry, int offset);
65 static void _pullentry(Nptr node, int entry, int offset);
66 static void _xferentry(Nptr srcNode, int srcEntry, Nptr destNode, int destEntry);
67 static void _setentry(Nptr node, int entry, keyT key, Nptr downNode);
69 /* access key and data values for B+tree methods */
70 /* pass values to getSlot(), descend...() */
71 static keyT getfunkey(Tree *B);
72 static dataT getfundata(Tree *B);
73 static void setfunkey(Tree *B, keyT v);
74 static void setfundata(Tree *B, dataT v);
78 static int _isRoot(Tree *B, Nptr n)
80 int flagset = ((n->flags & isROOT) == isROOT);
85 if (flagset && n != getroot(B))
91 static int _isFew(Tree *B, Nptr n)
93 int flagset = ((n->flags & FEWEST) == FEWEST);
94 int fanout = getminfanout(B, n);
95 int entries = numentries(n);
96 int mincnt = entries <= fanout;
101 if (flagset && !mincnt || !flagset && mincnt)
107 static int _isFull(Tree *B, Nptr n)
109 int flagset = ((n->flags & isFULL) == isFULL);
110 int maxcnt = numentries(n) == getfanout(B);
115 if (flagset && !maxcnt || !flagset && maxcnt)
120 #endif /* DEBUG_BTREE */
122 /***********************************************************************\
123 | B+tree Initialization and Cleanup Routines |
124 \***********************************************************************/
125 static DWORD TlsKeyIndex;
126 static DWORD TlsDataIndex;
128 long cm_InitBPlusDir(void)
130 if ((TlsKeyIndex = TlsAlloc()) == TLS_OUT_OF_INDEXES)
133 if ((TlsDataIndex = TlsAlloc()) == TLS_OUT_OF_INDEXES)
139 /******************** Set up B+tree structure **********************/
140 Tree *initBtree(unsigned int poolsz, unsigned int fanout, KeyCmp keyCmp)
144 dataT data = {0,0,0,0};
146 if (fanout > MAX_FANOUT)
149 setbplustree(B, malloc(sizeof(Tree)));
150 memset(B, 0, sizeof(Tree));
151 setfanout(B, fanout);
152 setminfanout(B, (fanout + 1) >> 1);
153 initFreeNodePool(B, poolsz);
155 setleaf(B, getFreeNode(B)); /* set up the first leaf node */
156 setroot(B, getleaf(B)); /* the root is initially the leaf */
157 setflag(getroot(B), isLEAF);
158 setflag(getroot(B), isROOT);
159 setflag(getroot(B), FEWEST);
164 setcomparekeys(B, keyCmp);
167 sprintf(B->message, "INIT: B+tree of fanout %d at %10p.\n", fanout, (void *)B);
168 OutputDebugString(B->message);
174 /******************** Clean up B+tree structure ********************/
176 * dirlock must be write locked
178 void freeBtree(Tree *B)
181 sprintf(B->message, "FREE: B+tree at %10p.\n", (void *) B);
182 OutputDebugString(B->message);
187 memset(B, 0, sizeof(*B));
192 /* access key and data values for B+tree methods */
193 /* pass values to getSlot(), descend...() */
194 static keyT getfunkey(Tree *B) {
197 // Retrieve a data pointer for the current thread.
198 tlsKey = (keyT *) TlsGetValue(TlsKeyIndex);
199 if (tlsKey == NULL) {
200 if (GetLastError() != ERROR_SUCCESS)
201 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
203 osi_panic("get before set", __FILE__, __LINE__);
209 static dataT getfundata(Tree *B) {
212 // Retrieve a data pointer for the current thread.
213 tlsData = (dataT *) TlsGetValue(TlsDataIndex);
214 if (tlsData == NULL) {
215 if (GetLastError() != ERROR_SUCCESS)
216 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
218 osi_panic("get before set", __FILE__, __LINE__);
224 static void setfunkey(Tree *B, keyT theKey) {
227 tlsKey = (keyT *) TlsGetValue(TlsKeyIndex);
228 if (tlsKey == NULL) {
229 if (GetLastError() != ERROR_SUCCESS)
230 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
232 tlsKey = malloc(sizeof(keyT));
234 if (!TlsSetValue(TlsKeyIndex, tlsKey))
235 osi_panic("TlsSetValue failed", __FILE__, __LINE__);
241 static void setfundata(Tree *B, dataT theData) {
244 tlsData = (dataT *) TlsGetValue(TlsDataIndex);
245 if (tlsData == NULL) {
246 if (GetLastError() != ERROR_SUCCESS)
247 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
249 tlsData = malloc(sizeof(dataT));
251 if (!TlsSetValue(TlsDataIndex, tlsData))
252 osi_panic("TlsSetValue failed", __FILE__, __LINE__);
259 /***********************************************************************\
260 | Find leaf node in which data nodes can be found |
261 \***********************************************************************/
263 /********************** top level lookup **********************/
264 Nptr bplus_Lookup(Tree *B, keyT key)
269 sprintf(B->message, "LOOKUP: key %s.\n", key.name);
270 OutputDebugString(B->message);
273 setfunkey(B, key); /* set search key */
274 leafNode = descendToLeaf(B, getroot(B)); /* start search from root node */
282 slot = getSlot(B, leafNode);
286 dataNode = getnode(leafNode, slot);
287 data = getdatavalue(dataNode);
289 sprintf(B->message, "LOOKUP: %s found on page %d value (%d.%d.%d).\n",
291 getnodenumber(B, leafNode),
296 sprintf(B->message, "LOOKUP: not found!\n");
297 OutputDebugString(B->message);
303 /********************** `recurse' down B+tree **********************/
304 static Nptr descendToLeaf(Tree *B, Nptr curr)
311 memset(prev, 0, sizeof(prev));
313 for (depth = 0, slot = getSlot(B, curr); (slot >= 0) && isinternal(curr); depth++, slot = getSlot(B, curr)) {
316 curr = getfirstnode(curr);
318 curr = getnode(curr, slot);
319 else /* BTERROR, BTLOWER, BTUPPER */ {
328 if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0))
329 findNode = curr; /* correct key value found */
331 findNode = NONODE; /* key value not in tree */
336 /******************** find slot for search key *********************/
337 static int getSlot(Tree *B, Nptr curr)
341 entries = numentries(curr); /* need this if root is ever empty */
342 slot = !entries ? 0 : findKey(B, curr, 1, entries);
348 /******************** recursive binary search **********************/
349 static int findKey(Tree *B, Nptr curr, int lo, int hi)
351 int mid, findslot = BTERROR;
354 findslot = bestMatch(B, curr, lo); /* recursion base case */
357 if (findslot == BTERROR) {
358 sprintf(B->message, "FINDKEY: (lo %d hi %d) Bad key ordering on node %d (0x%p)\n",
359 lo, hi, getnodenumber(B, curr), curr);
360 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
364 mid = (lo + hi) >> 1;
365 switch (findslot = bestMatch(B, curr, mid)) {
366 case BTLOWER: /* check lower half of range */
368 findslot = findKey(B, curr, lo, mid - 1); /* never in 2-3+trees */
370 case BTUPPER: /* check upper half of range */
371 if (mid < getfanout(B))
372 findslot = findKey(B, curr, mid + 1, hi);
375 sprintf(B->message, "FINDKEY: (lo %d hi %d) Bad key ordering on node %d (0x%p)\n",
376 lo, hi, getnodenumber(B, curr), curr);
377 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
381 if (isleaf(curr) && findslot == 0)
383 sprintf(B->message, "FINDKEY: (lo %d hi %d) findslot %d is invalid for leaf nodes, bad key ordering on node %d (0x%p)\n",
384 lo, hi, findslot, getnodenumber(B, curr), curr);
385 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
391 /************ comparison of key with a target key slot *************/
392 static int bestMatch(Tree *B, Nptr curr, int slot)
394 int diff, comp=2, findslot;
396 diff = comparekeys(B)(getfunkey(B), getkey(curr, slot), 0);
399 } else if (diff < 0) { /* also check previous slot */
402 findslot = BTLOWER; /* not found in the tree */
406 else if ((comp = comparekeys(B)(getfunkey(B), getkey(curr, slot - 1), 0)) >= 0) {
408 } else if (comp < diff) {
409 findslot = BTERROR; /* inconsistent ordering of keys */
414 findslot = BTLOWER; /* key must be below in node ordering */
416 } else { /* or check following slot */
417 if (slot == numentries(curr)) {
418 if (isleaf(curr) && numentries(curr) == getfanout(B))
422 } else if ((comp = comparekeys(B)(getfunkey(B), getkey(curr, slot + 1), 0)) < 0) {
424 } else if (comp == 0) {
426 } else if (comp > diff) {
427 findslot = BTERROR; /* inconsistent ordering of keys */
432 findslot = BTUPPER; /* key must be above in node ordering */
436 if (findslot == BTERROR || isleaf(curr) && findslot == 0)
438 sprintf(B->message, "BESTMATCH: node %d (0x%p) slot %d diff %d comp %d findslot %d\n",
439 getnodenumber(B, curr), curr, slot, diff, comp, findslot);
440 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
446 /***********************************************************************\
447 | Insert new data into tree |
448 \***********************************************************************/
451 /********************** top level insert call **********************/
452 void insert(Tree *B, keyT key, dataT data)
457 sprintf(B->message, "INSERT: key %s.\n", key.name);
458 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
461 setfunkey(B, key); /* set insertion key */
462 setfundata(B, data); /* a node containing data */
463 setsplitpath(B, NONODE);
464 newNode = descendSplit(B, getroot(B)); /* insertion point search from root */
465 if (newNode != getsplitpath(B)) /* indicates the root node has split */
466 makeNewRoot(B, getroot(B), newNode);
470 /***************** recurse down and split back up ******************/
472 descendSplit(Tree *B, Nptr curr)
474 Nptr downNode = NONODE, sibling = NONODE;
482 setsplitpath(B, NONODE);
483 else if (getsplitpath(B) == NONODE)
484 setsplitpath(B, curr); /* indicates where nodes must split */
486 slot = getSlot(B, curr); /* is null only if the root is empty */
493 else if (slot == BTUPPER)
497 if (isinternal(curr)) { /* continue recursion to leaves */
499 downNode = descendSplit(B, getfirstnode(curr));
501 downNode = descendSplit(B, getnode(curr, slot));
502 } else if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0)) {
503 if (!(gettreeflags(B) & TREE_FLAG_UNIQUE_KEYS)) {
504 downNode = getDataNode(B, getfunkey(B), getfundata(B));
505 getdatanext(downNode) = getnode(curr,slot);
506 setnode(curr, slot, downNode);
509 setsplitpath(B, NONODE);
512 downNode = getDataNode(B, getfunkey(B), getfundata(B)); /* an insertion takes place */
514 if (downNode != NONODE) { /* insert only where necessary */
515 if (getsplitpath(B) != NONODE)
516 sibling = split(B, curr); /* a sibling node is prepared */
517 insertEntry(B, curr, slot, sibling, downNode);
523 /*************** determine location of inserted key ****************/
525 insertEntry(Tree *B, Nptr currNode, int slot, Nptr sibling, Nptr downPtr)
527 int split, i, j, k, x, y;
531 sprintf(B->message, "INSERT: slot %d, down node %d.\n", slot, getnodenumber(B, downPtr));
532 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
535 if (sibling == NONODE) { /* no split occurred */
536 placeEntry(B, currNode, slot + 1, downPtr);
538 else { /* split entries between the two */
539 if isinternal(currNode) {
541 split = getfanout(B) - getminfanout(B, currNode);
542 } else if (isroot(currNode)) {
543 /* split the root node and turn it into just a leaf */
545 split = getminfanout(B, currNode);
548 split = getminfanout(B, currNode);
550 j = (slot != split ? 1 : 0);
551 k = (slot >= split ? 1 : 0);
554 * Move entries from the top half of the current node to
555 * to the sibling node.
556 * The number of entries to move is dependent upon where
557 * the new entry is going to be added in relationship to
558 * the split slot. (slots are 1-based). If order to produce
559 * a balanced tree, if the insertion slot is greater than
560 * the split we move one less entry as the new entry will
561 * be inserted into the sibling.
563 * If the node that is being split is an internal node (i != 0)
564 * then we move one less entry due to the extra down pointer
565 * when the split slot is not equal to the insertion slot
567 for (x = split + k + j * i, y = 1; x <= getfanout(B); x++, y++) {
568 xferentry(currNode, x, sibling, y); /* copy entries to sibling */
569 clrentry(currNode, x);
570 decentries(currNode);
574 if (getkey(sibling, numentries(sibling)).name == NULL)
578 clrflag(currNode, isFULL);
579 if (numentries(currNode) == getminfanout(B, currNode))
580 setflag(currNode, FEWEST); /* never happens in even size nodes */
583 if (numentries(sibling) > getfanout(B))
586 if (numentries(sibling) == getfanout(B))
587 setflag(sibling, isFULL); /* only ever happens in 2-3+trees */
589 if (numentries(sibling) > getminfanout(B, sibling))
590 clrflag(sibling, FEWEST);
592 if (i) { /* set first pointer of internal node */
594 setfirstnode(sibling, getnode(currNode, split + k));
595 decentries(currNode);
596 if (numentries(currNode) == getminfanout(B, currNode))
597 setflag(currNode, FEWEST);
599 clrflag(currNode, FEWEST);
602 setfirstnode(sibling, downPtr);
605 if (j) { /* insert new entry into correct spot */
607 placeEntry(B, sibling, slot - split + 1 - i, downPtr);
609 placeEntry(B, currNode, slot + 1, downPtr);
611 /* set key separating nodes */
613 key = getkey(sibling, 1);
615 Nptr leaf = getfirstnode(sibling);
616 while ( isinternal(leaf) )
617 leaf = getfirstnode(leaf);
618 key = getkey(leaf, 1);
623 placeEntry(B, sibling, 1, downPtr);
627 /************ place key into appropriate node & slot ***************/
629 placeEntry(Tree *B, Nptr node, int slot, Nptr downPtr)
639 if (numentries(node) != 0 && getkey(node, numentries(node)).name == NULL)
642 for (x = numentries(node); x >= slot && x != 0; x--) /* make room for new entry */
643 pushentry(node, x, 1);
644 setentry(node, slot, getfunkey(B), downPtr);/* place it in correct slot */
646 incentries(node); /* adjust entry counter */
648 if (getkey(node, numentries(node)).name == NULL)
652 if (numentries(node) == getfanout(B))
653 setflag(node, isFULL);
654 if (numentries(node) > getminfanout(B, node))
655 clrflag(node, FEWEST);
657 setflag(node, FEWEST);
661 /***************** split full node and set flags *******************/
663 split(Tree *B, Nptr node)
667 sibling = getFreeNode(B);
669 setflag(sibling, FEWEST); /* set up node flags */
672 setflag(sibling, isLEAF);
673 setnextnode(sibling, getnextnode(node));/* adjust leaf pointers */
674 setnextnode(node, sibling);
676 if (getsplitpath(B) == node)
677 setsplitpath(B, NONODE); /* no more splitting needed */
680 clrflag(node, isROOT);
686 /********************** build new root node ************************/
688 makeNewRoot(Tree *B, Nptr oldRoot, Nptr newNode)
690 setroot(B, getFreeNode(B));
692 setfirstnode(getroot(B), oldRoot); /* old root becomes new root's child */
693 setentry(getroot(B), 1, getfunkey(B), newNode); /* old root's sibling also */
694 incentries(getroot(B));
696 if (numentries(getroot(B)) > getfanout(B))
700 /* the oldRoot's isROOT flag was cleared in split() */
701 setflag(getroot(B), isROOT);
702 setflag(getroot(B), FEWEST);
703 clrflag(getroot(B), isLEAF);
708 /***********************************************************************\
709 | Delete data from tree |
710 \***********************************************************************/
712 /********************** top level delete call **********************\
714 | The recursive call for deletion carries 5 additional parameters
715 | which may be needed to rebalance the B+tree when removing the key.
716 | These parameters are:
717 | 1. immediate left neighbor of the current node
718 | 2. immediate right neighbor of the current node
719 | 3. the anchor of the current node and left neighbor
720 | 4. the anchor of the current node and right neighbor
721 | 5. the parent of the current node
723 | All of these parameters are simple to calculate going along the
724 | recursive path to the leaf nodes and the point of key deletion.
725 | At that time, the algorithm determines which node manipulations
726 | are most efficient, that is, cause the least rearranging of data,
727 | and minimize the need for non-local key manipulation.
729 \***********************************************************************/
730 void delete(Tree *B, keyT key)
735 sprintf(B->message, "DELETE: key %s.\n", key.name);
736 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
739 setfunkey(B, key); /* set deletion key */
740 setmergepath(B, NONODE);
741 newNode = descendBalance(B, getroot(B), NONODE, NONODE, NONODE, NONODE, NONODE);
742 if (isnode(newNode)) {
744 sprintf(B->message, "DELETE: collapsing node %d", getnodenumber(B, newNode));
745 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
747 collapseRoot(B, getroot(B), newNode); /* remove root when superfluous */
752 /********************** remove old root node ***********************/
754 collapseRoot(Tree *B, Nptr oldRoot, Nptr newRoot)
758 sprintf(B->message, "COLLAPSE: old %d, new %d.\n", getnodenumber(B, oldRoot), getnodenumber(B, newRoot));
759 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
760 showNode(B, "collapseRoot oldRoot", oldRoot);
761 showNode(B, "collapseRoot newRoot", newRoot);
765 setflag(newRoot, isROOT);
766 clrflag(newRoot, FEWEST);
767 putFreeNode(B, oldRoot);
768 dectreeheight(B); /* the height of the tree decreases */
772 /**************** recurse down and balance back up *****************/
774 descendBalance(Tree *B, Nptr curr, Nptr left, Nptr right, Nptr lAnc, Nptr rAnc, Nptr parent)
776 Nptr newMe=NONODE, myLeft=NONODE, myRight=NONODE, lAnchor=NONODE, rAnchor=NONODE, newNode=NONODE;
777 int slot = 0, notleft = 0, notright = 0, fewleft = 0, fewright = 0, test = 0;
780 sprintf(B->message, "descendBalance curr %d, left %d, right %d, lAnc %d, rAnc %d, parent %d\n",
781 curr ? getnodenumber(B, curr) : -1,
782 left ? getnodenumber(B, left) : -1,
783 right ? getnodenumber(B, right) : -1,
784 lAnc ? getnodenumber(B, lAnc) : -1,
785 rAnc ? getnodenumber(B, rAnc) : -1,
786 parent ? getnodenumber(B, parent) : -1);
787 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
791 setmergepath(B,NONODE);
792 else if (getmergepath(B) == NONODE)
793 setmergepath(B, curr); /* mark which nodes may need rebalancing */
795 slot = getSlot(B, curr);
802 else if (slot == BTUPPER)
806 if (isinternal(curr)) /* set up next recursion call's parameters */
809 newNode = getfirstnode(curr);
810 myLeft = !isnode(left) ? NONODE : getlastnode(left);
814 newNode = getnode(curr, slot);
816 myLeft = getfirstnode(curr);
818 myLeft = getnode(curr, slot - 1);
822 if (slot == numentries(curr)) {
823 myRight = !isnode(right) ? NONODE : getfirstnode(right);
827 myRight = getnode(curr, slot + 1);
830 newMe = descendBalance(B, newNode, myLeft, myRight, lAnchor, rAnchor, curr);
832 else if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0))
838 newNode = getnode(curr, slot);
839 next = getdatanext(newNode);
842 * We only delete exact matches.
844 if (!comparekeys(B)(getfunkey(B), getdatakey(newNode), EXACT_MATCH)) {
845 /* exact match, free the first entry */
846 setnode(curr, slot, next);
848 if (next == NONODE) {
849 /* delete this key as there are no more data values */
852 /* otherwise, there are more and we leave the key in place */
853 setnode(curr, slot, next);
854 putFreeNode(B, newNode);
856 /* but do not delete the key */
858 setmergepath(B, NONODE);
860 } else if (next == NONODE) {
862 * we didn't find an exact match and there are no more
863 * choices. so we leave it alone and remove nothing.
866 setmergepath(B, NONODE);
868 /* The first data node doesn't match but there are other
869 * options. So we must determine if any of the next nodes
870 * are the one we are looking for.
875 if (!comparekeys(B)(getfunkey(B), getdatakey(next), EXACT_MATCH)) {
876 /* we found the one to delete */
877 getdatanext(prev) = getdatanext(next);
878 putFreeNode(B, next);
882 next = getdatanext(next);
885 /* do not delete the key */
887 setmergepath(B, NONODE);
892 newMe = NONODE; /* no deletion possible, key not found */
893 setmergepath(B, NONODE);
896 /***************** rebalancing tree after deletion *****************\
898 | The simplest B+tree rebalancing consists of the following rules.
900 | If a node underflows:
901 | CASE 1 check if it is the root, and collapse it if it is,
902 | CASE 2 otherwise, check if both of its neighbors are minimum
903 | sized and merge the underflowing node with one of them,
904 | CASE 3 otherwise shift surplus entries to the underflowing node.
906 | The choice of which neighbor to use is optional. However, the
907 | rebalancing rules that follow also ensure whenever possible
908 | that the merges and shifts which do occur use a neighbor whose
909 | anchor is the parent of the underflowing node.
911 | Cases 3, 4, 5 below are more an optimization than a requirement,
912 | and can be omitted, with a change of the action value in case 6,
913 | which actually corresponds to the third case described above.
915 \***********************************************************************/
917 /* begin deletion, working upwards from leaves */
919 if (newMe != NONODE) { /* this node removal doesn't consider duplicates */
921 sprintf(B->message, "descendBalance DELETE: slot %d, node %d.\n", slot, getnodenumber(B, curr));
922 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
925 removeEntry(B, curr, slot + (newMe != newNode)); /* removes one of two */
928 showNode(B, "descendBalance curr", curr);
932 if (getmergepath(B) == NONODE)
934 else { /* tree rebalancing rules for node merges and shifts */
935 notleft = !isnode(left);
936 notright = !isnode(right);
938 fewleft = isfew(left); /* only used when defined */
940 fewright = isfew(right);
942 /* CASE 1: prepare root node (curr) for removal */
943 if (notleft && notright) {
944 test = isleaf(curr); /* check if B+tree has become empty */
945 newNode = test ? NONODE : getfirstnode(curr);
947 /* CASE 2: the merging of two nodes is a must */
948 else if ((notleft || fewleft) && (notright || fewright)) {
949 test = (lAnc != parent);
950 newNode = test ? merge(B, curr, right, rAnc) : merge(B, left, curr, lAnc);
952 /* CASE 3: choose the better of a merge or a shift */
953 else if (!notleft && fewleft && !notright && !fewright) {
954 test = (rAnc != parent) && (curr == getmergepath(B));
955 newNode = test ? merge(B, left, curr, lAnc) : shift(B, curr, right, rAnc);
957 /* CASE 4: also choose between a merge or a shift */
958 else if (!notleft && !fewleft && !notright && fewright) {
959 test = !(lAnc == parent) && (curr == getmergepath(B));
960 newNode = test ? merge(B, curr, right, rAnc) : shift(B, left, curr, lAnc);
962 /* CASE 5: choose the more effective of two shifts */
963 else if (lAnc == rAnc) { /* => both anchors are the parent */
964 test = (numentries(left) <= numentries(right));
965 newNode = test ? shift(B, curr, right, rAnc) : shift(B, left, curr, lAnc);
967 /* CASE 6: choose the shift with more local effect */
968 else { /* if omitting cases 3,4,5 use below */
969 test = (lAnc == parent); /* test = (!notleft && !fewleft); */
970 newNode = test ? shift(B, left, curr, lAnc) : shift(B, curr, right, rAnc);
975 sprintf(B->message, "descendBalance returns %d\n", getnodenumber(B, newNode));
976 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
982 /**************** remove key and pointer from node *****************/
984 removeEntry(Tree *B, Nptr curr, int slot)
988 putFreeNode(B, getnode(curr, slot)); /* return deleted node to free list */
989 for (x = slot; x < numentries(curr); x++)
990 pullentry(curr, x, 1); /* adjust node with removed key */
992 clrflag(curr, isFULL); /* keep flag information up to date */
993 if (numentries(curr) > getminfanout(B, curr))
994 clrflag(curr, FEWEST);
996 setflag(curr, FEWEST);
1000 /******* merge a node pair & set emptied node up for removal *******/
1002 merge(Tree *B, Nptr left, Nptr right, Nptr anchor)
1007 sprintf(B->message, "MERGE: left %d, right %d.\n", getnodenumber(B, left), getnodenumber(B, right));
1008 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1009 showNode(B, "pre-merge anchor", anchor);
1010 showNode(B, "pre-merge left", left);
1011 showNode(B, "pre-merge right", right);
1014 if (isinternal(left)) {
1015 incentries(left); /* copy key separating the nodes */
1017 if (numentries(left) > getfanout(B))
1020 setfunkey(B, getkey(right, 1)); /* defined but maybe just deleted */
1021 z = getSlot(B, anchor); /* needs the just calculated key */
1024 setfunkey(B, getkey(anchor, z)); /* set slot to delete in anchor */
1025 setentry(left, numentries(left), getfunkey(B), getfirstnode(right));
1028 setnextnode(left, getnextnode(right));
1030 for (x = numentries(left) + 1, y = 1; y <= numentries(right); x++, y++) {
1033 if (numentries(left) > getfanout(B))
1036 xferentry(right, y, left, x); /* transfer entries to left node */
1038 clearentries(right);
1040 if (numentries(left) > getminfanout(B, left))
1041 clrflag(left, FEWEST);
1042 if (numentries(left) == getfanout(B))
1043 setflag(left, isFULL); /* never happens in even size nodes */
1045 if (getmergepath(B) == left || getmergepath(B) == right)
1046 setmergepath(B, NONODE); /* indicate rebalancing is complete */
1049 showNode(B, "post-merge anchor", anchor);
1050 showNode(B, "post-merge left", left);
1051 showNode(B, "post-merge right", right);
1057 /****** shift entries in a node pair & adjust anchor key value *****/
1059 shift(Tree *B, Nptr left, Nptr right, Nptr anchor)
1064 sprintf(B->message, "SHIFT: left %d, right %d, anchor %d.\n",
1065 getnodenumber(B, left),
1066 getnodenumber(B, right),
1067 getnodenumber(B, anchor));
1068 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1069 showNode(B, "pre-shift anchor", anchor);
1070 showNode(B, "pre-shift left", left);
1071 showNode(B, "pre-shift right", right);
1074 i = isinternal(left);
1076 if (numentries(left) < numentries(right)) { /* shift entries to left */
1077 y = (numentries(right) - numentries(left)) >> 1;
1078 x = numentries(left) + y;
1079 setfunkey(B, getkey(right, y + 1 - i)); /* set new anchor key value */
1080 z = getSlot(B, anchor); /* find slot in anchor node */
1084 if (z == 0 && !isroot(anchor))
1087 if (i) { /* move out old anchor value */
1088 decentries(right); /* adjust for shifting anchor */
1091 if (numentries(left) > getfanout(B))
1094 setentry(left, numentries(left), getkey(anchor, z), getfirstnode(right));
1095 setfirstnode(right, getnode(right, y + 1 - i));
1097 clrflag(right, isFULL);
1098 setkey(anchor, z, getfunkey(B)); /* set new anchor value */
1099 for (z = y, y -= i; y > 0; y--, x--) {
1100 decentries(right); /* adjust entry count */
1103 if (numentries(left) > getfanout(B))
1106 xferentry(right, y, left, x); /* transfer entries over */
1109 for (x = 1; x <= numentries(right); x++) /* adjust reduced node */
1110 pullentry(right, x, z);
1112 else if (numentries(left) > numentries(right)) { /* shift entries to right */
1113 y = (numentries(left) - numentries(right)) >> 1;
1114 x = numentries(left) - y + 1;
1116 for (z = numentries(right); z > 0; z--) /* adjust increased node */
1117 pushentry(right, z, y);
1119 setfunkey(B, getkey(left, x)); /* set new anchor key value */
1120 z = getSlot(B, anchor);
1129 if (numentries(right) > getfanout(B))
1132 setentry(right, y, getkey(anchor, z), getfirstnode(right));
1133 setfirstnode(right, getnode(left, x));
1135 clrflag(left, isFULL);
1136 setkey(anchor, z, getfunkey(B));
1137 for (x = numentries(left) + i, y -= i; y > 0; y--, x--) {
1141 if (numentries(right) > getfanout(B))
1144 xferentry(left, x, right, y); /* transfer entries over */
1152 #endif /* DEBUG_BTREE */
1154 if (numentries(left) > getminfanout(B, left)) /* adjust node flags */
1155 clrflag(left, FEWEST); /* never happens in 2-3+trees */
1157 setflag(left, FEWEST);
1158 if (numentries(right) > getminfanout(B, right))
1159 clrflag(right, FEWEST); /* never happens in 2-3+trees */
1161 setflag(right, FEWEST);
1162 setmergepath(B, NONODE);
1165 sprintf(B->message, "SHIFT: left %d, right %d.\n", getnodenumber(B, left), getnodenumber(B, right));
1166 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1167 showNode(B, "post-shift anchor", anchor);
1168 showNode(B, "post-shift left", left);
1169 showNode(B, "post-shift right", right);
1177 _clrentry(Nptr node, int entry)
1179 if (getkey(node,entry).name != NULL) {
1180 free(getkey(node,entry).name);
1181 getkey(node,entry).name = NULL;
1183 getnode(node,entry) = NONODE;
1187 _pushentry(Nptr node, int entry, int offset)
1189 if (getkey(node,entry + offset).name != NULL)
1190 free(getkey(node,entry + offset).name);
1195 getkey(node,entry + offset).name = strdup(getkey(node,entry).name);
1197 if ( getnode(node, entry) == NONODE )
1200 getnode(node,entry + offset) = getnode(node,entry);
1204 _pullentry(Nptr node, int entry, int offset)
1206 if (getkey(node,entry).name != NULL)
1207 free(getkey(node,entry).name);
1208 getkey(node,entry).name = strdup(getkey(node,entry + offset).name);
1210 if ( getnode(node, entry + offset) == NONODE )
1213 getnode(node,entry) = getnode(node,entry + offset);
1217 _xferentry(Nptr srcNode, int srcEntry, Nptr destNode, int destEntry)
1219 if (getkey(destNode,destEntry).name != NULL)
1220 free(getkey(destNode,destEntry).name);
1221 getkey(destNode,destEntry).name = strdup(getkey(srcNode,srcEntry).name);
1223 if ( getnode(srcNode, srcEntry) == NONODE )
1226 getnode(destNode,destEntry) = getnode(srcNode,srcEntry);
1230 _setentry(Nptr node, int entry, keyT key, Nptr downNode)
1232 if (getkey(node,entry).name != NULL)
1233 free(getkey(node,entry).name);
1234 getkey(node,entry).name = strdup(key.name);
1236 if ( downNode == NONODE )
1239 getnode(node,entry) = downNode;
1243 /***********************************************************************\
1244 | Empty Node Utilities |
1245 \***********************************************************************/
1247 /********************* Set up initial pool of free nodes ***********/
1249 initFreeNodePool(Tree *B, int quantity)
1254 setfirstallnode(B, NONODE);
1255 setfirstfreenode(B, NONODE);
1257 for (i = 0, p = NONODE; i < quantity; i++) {
1258 n = malloc(sizeof(*n));
1259 memset(n, 0, sizeof(*n));
1260 setnodenumber(B,n,i);
1263 setnextnode(p, n); /* insert node into free node list */
1266 setfirstfreenode(B, n);
1267 setfirstallnode(B, n);
1271 setnextnode(p, NONODE); /* indicates end of free node list */
1272 setallnode(p, NONODE); /* indicates end of all node list */
1274 setpoolsize(B, quantity);
1278 /******************* Cleanup Free Node Pool **************************/
1281 cleanupNodePool(Tree *B)
1286 for ( i=0, node = getfirstallnode(B); node != NONODE && i<getpoolsize(B); node = next, i++ ) {
1288 if ( getdatakey(node).name ) {
1289 free(getdatakey(node).name);
1290 getdatakey(node).name = NULL;
1292 if ( getdatavalue(node).longname ) {
1293 free(getdatavalue(node).longname);
1294 getdatavalue(node).longname = NULL;
1296 } else { /* data node */
1297 for ( j=1; j<=getfanout(B); j++ ) {
1298 if (getkey(node, j).name)
1299 free(getkey(node, j).name);
1302 next = getallnode(node);
1307 /************** take a free B+tree node from the pool **************/
1309 getFreeNode(Tree *B)
1311 Nptr newNode = getfirstfreenode(B);
1313 if (newNode != NONODE) {
1314 setfirstfreenode(B, getnextnode(newNode)); /* adjust free node list */
1315 setnextnode(newNode, NONODE); /* remove node from list */
1318 newNode = malloc(sizeof(*newNode));
1319 memset(newNode, 0, sizeof(*newNode));
1321 setallnode(newNode, getfirstallnode(B));
1322 setfirstallnode(B, newNode);
1323 setnodenumber(B, newNode, getpoolsize(B));
1324 setpoolsize(B, getpoolsize(B) + 1);
1327 clearflags(newNode); /* Sets MAGIC */
1332 /************* return a deleted B+tree node to the pool ************/
1334 putFreeNode(Tree *B, Nptr node)
1342 if ( getdatakey(node).name )
1343 free(getdatakey(node).name);
1344 } else { /* data node */
1345 for ( i=1; i<=getfanout(B); i++ ) {
1346 if (getkey(node, i).name)
1347 free(getkey(node, i).name);
1351 /* free nodes do not have MAGIC set */
1352 memset(&nAdr(node), 0, sizeof(nAdr(node)));
1353 setnextnode(node, getfirstfreenode(B)); /* add node to list */
1354 setfirstfreenode(B, node); /* set it to be list head */
1358 /******* fill a free data node with a key and associated data ******/
1360 getDataNode(Tree *B, keyT key, dataT data)
1362 Nptr newNode = getFreeNode(B);
1364 setflag(newNode, isDATA);
1365 getdatakey(newNode).name = strdup(key.name);
1366 getdatavalue(newNode) = data;
1367 getdatanext(newNode) = NONODE;
1374 /***********************************************************************\
1375 | B+tree Printing Utilities |
1376 \***********************************************************************/
1378 /*********************** B+tree node printer ***********************/
1379 void showNode(Tree *B, const char * where, Nptr n)
1383 sprintf(B->message, "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1384 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1385 sprintf(B->message, "| %-20s |\n", where);
1386 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1387 sprintf(B->message, "| node %6d ", getnodenumber(B, n));
1388 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1389 sprintf(B->message, " magic %4x |\n", getmagic(n));
1390 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1391 sprintf(B->message, "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1392 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1393 sprintf(B->message, "| flags %1d%1d%1d%1d ", isfew(n), isfull(n), isroot(n), isleaf(n));
1394 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1395 sprintf(B->message, "| keys = %5d ", numentries(n));
1396 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1397 sprintf(B->message, "| node = %6d |\n", getnodenumber(B, getfirstnode(n)));
1398 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1399 for (x = 1; x <= numentries(n); x++) {
1400 sprintf(B->message, "| entry %6d ", x);
1401 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1402 sprintf(B->message, "| key = %6s ", getkey(n, x).name);
1403 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1404 sprintf(B->message, "| node = %6d |\n", getnodenumber(B, getnode(n, x)));
1405 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1407 sprintf(B->message, "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1408 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1411 /****************** B+tree class variable printer ******************/
1412 void showBtree(Tree *B)
1414 sprintf(B->message, "- -- -- -- -- -- -\n");
1415 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1416 sprintf(B->message, "| B+tree %10p |\n", (void *) B);
1417 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1418 sprintf(B->message, "- -- -- -- -- -- -\n");
1419 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1420 sprintf(B->message, "| root %6d |\n", getnodenumber(B, getroot(B)));
1421 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1422 sprintf(B->message, "| leaf %6d |\n", getnodenumber(B, getleaf(B)));
1423 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1424 sprintf(B->message, "| fanout %3d |\n", getfanout(B) + 1);
1425 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1426 sprintf(B->message, "| minfanout %3d |\n", getminfanout(B, getroot(B)) + 1);
1427 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1428 sprintf(B->message, "| height %3d |\n", gettreeheight(B));
1429 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1430 sprintf(B->message, "| freenode %6d |\n", getnodenumber(B, getfirstfreenode(B)));
1431 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1432 sprintf(B->message, "| theKey %6s |\n", getfunkey(B).name);
1433 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1434 sprintf(B->message, "| theData %d.%d.%d |\n", getfundata(B).volume,
1435 getfundata(B).vnode, getfundata(B).unique);
1436 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1437 sprintf(B->message, "- -- -- -- -- -- -\n");
1438 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1442 listBtreeNodes(Tree *B, const char * parent_desc, Nptr node)
1448 if (isntnode(node)) {
1449 sprintf(B->message, "%s - NoNode!!!\n");
1450 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1456 data = getdatavalue(node);
1457 sprintf(B->message, "%s - data node %d (%d.%d.%d)\n",
1458 parent_desc, getnodenumber(B, node),
1459 data.fid.volume, data.fid.vnode, data.fid.unique);
1460 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1463 showNode(B, parent_desc, node);
1465 if ( isinternal(node) || isroot(node) ) {
1466 sprintf(thisnode, "parent %6d", getnodenumber(B , node));
1468 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1469 for ( i= isinternal(node) ? 0 : 1; i <= numentries(node); i++ ) {
1470 listBtreeNodes(B, thisnode, i == 0 ? getfirstnode(node) : getnode(node, i));
1475 /*********************** B+tree data printer ***********************/
1477 listBtreeValues(Tree *B, Nptr n, int num)
1483 for (slot = 1; (n != NONODE) && num && numentries(n); num--) {
1484 if (comparekeys(B)(getkey(n, slot),prev, 0) < 0) {
1485 sprintf(B->message, "BOMB %8s\n", getkey(n, slot).name);
1486 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1489 prev = getkey(n, slot);
1490 data = getdatavalue(getnode(n, slot));
1491 sprintf(B->message, "%8s (%d.%d.%d)\n",
1492 prev.name, data.fid.volume, data.fid.vnode, data.fid.unique);
1493 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1494 if (++slot > numentries(n))
1495 n = getnextnode(n), slot = 1;
1497 sprintf(B->message, "\n\n");
1498 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1501 /******************** entire B+tree data printer *******************/
1503 listAllBtreeValues(Tree *B)
1505 listBtreeValues(B, getleaf(B), BTERROR);
1507 #endif /* DEBUG_BTREE */
1510 findAllBtreeValues(Tree *B)
1513 Nptr n = getleaf(B), l;
1517 for (slot = 1; (n != NONODE) && num && numentries(n); num--) {
1518 if (comparekeys(B)(getkey(n, slot),prev, 0) < 0) {
1519 sprintf(B->message,"BOMB %8s\n", getkey(n, slot).name);
1520 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1525 prev = getkey(n, slot);
1526 l = bplus_Lookup(B, prev);
1529 sprintf(B->message,"BOMB %8s cannot be found\n", prev.name);
1531 sprintf(B->message,"BOMB lookup(%8s) finds wrong node\n", prev.name);
1532 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1538 if (++slot > numentries(n))
1539 n = getnextnode(n), slot = 1;
1544 * the return must be -1, 0, or 1. stricmp() in MSVC 8.0
1545 * does not return only those values.
1547 * the sorting of the tree is by case insensitive sort order
1548 * therefore, unless the strings actually match via a case
1549 * insensitive search do we want to perform the case sensitive
1550 * match. Otherwise, the search order might be considered
1551 * to be inconsistent when the EXACT_MATCH flag is set.
1554 compareKeys(keyT key1, keyT key2, int flags)
1558 comp = stricmp(key1.name, key2.name);
1559 if (comp == 0 && (flags & EXACT_MATCH))
1560 comp = strcmp(key1.name, key2.name);
1561 return (comp < 0 ? -1 : (comp > 0 ? 1 : 0));
1564 /* Look up a file name in directory.
1567 op->scp->dirlock is read locked
1570 op->scp->dirlock is read locked
1573 cm_BPlusDirLookup(cm_dirOp_t * op, char *entry, cm_fid_t * cfid)
1577 Nptr leafNode = NONODE;
1578 LARGE_INTEGER start, end;
1580 if (op->scp->dirBplus == NULL ||
1581 op->dataVersion != op->scp->dirDataVersion) {
1586 lock_AssertAny(&op->scp->dirlock);
1588 QueryPerformanceCounter(&start);
1590 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1591 if (leafNode != NONODE) {
1593 Nptr firstDataNode, dataNode, nextDataNode;
1597 /* Found a leaf that matches the key via a case-insensitive
1598 * match. There may be one or more data nodes that match.
1599 * If we have an exact match, return that.
1600 * If we have an ambiguous match, return an error.
1601 * If we have only one inexact match, return that.
1603 slot = getSlot(op->scp->dirBplus, leafNode);
1604 if (slot <= BTERROR) {
1605 op->scp->dirDataVersion = 0;
1606 rc = (slot == BTERROR ? EINVAL : ENOENT);
1609 firstDataNode = getnode(leafNode, slot);
1611 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1613 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1617 nextDataNode = getdatanext(dataNode);
1621 *cfid = getdatavalue(dataNode).fid;
1623 bplus_lookup_hits++;
1624 } else if (count == 1) {
1625 *cfid = getdatavalue(firstDataNode).fid;
1626 rc = CM_ERROR_INEXACT_MATCH;
1627 bplus_lookup_hits_inexact++;
1629 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1630 bplus_lookup_ambiguous++;
1634 bplus_lookup_misses++;
1637 QueryPerformanceCounter(&end);
1639 bplus_lookup_time += (end.QuadPart - start.QuadPart);
1648 op->scp->dirlock is write locked
1651 op->scp->dirlock is write locked
1653 long cm_BPlusDirCreateEntry(cm_dirOp_t * op, char *entry, cm_fid_t * cfid)
1658 LARGE_INTEGER start, end;
1661 if (op->scp->dirBplus == NULL ||
1662 op->dataVersion != op->scp->dirDataVersion) {
1668 lock_AssertWrite(&op->scp->dirlock);
1670 cm_SetFid(&data.fid, cfid->cell, cfid->volume, cfid->vnode, cfid->unique);
1671 data.longname = NULL;
1673 QueryPerformanceCounter(&start);
1674 bplus_create_entry++;
1676 insert(op->scp->dirBplus, key, data);
1677 if (!cm_Is8Dot3(entry)) {
1679 dfid.vnode = htonl(data.fid.vnode);
1680 dfid.unique = htonl(data.fid.unique);
1682 cm_Gen8Dot3NameInt(entry, &dfid, shortName, NULL);
1684 key.name = shortName;
1685 data.longname = strdup(entry);
1686 insert(op->scp->dirBplus, key, data);
1689 QueryPerformanceCounter(&end);
1691 bplus_create_time += (end.QuadPart - start.QuadPart);
1700 op->scp->dirlock is write locked
1703 op->scp->dirlock is write locked
1705 int cm_BPlusDirDeleteEntry(cm_dirOp_t * op, char *entry)
1709 Nptr leafNode = NONODE;
1710 LARGE_INTEGER start, end;
1712 if (op->scp->dirBplus == NULL ||
1713 op->dataVersion != op->scp->dirDataVersion) {
1718 lock_AssertWrite(&op->scp->dirlock);
1720 QueryPerformanceCounter(&start);
1722 bplus_remove_entry++;
1724 if (op->scp->dirBplus) {
1725 if (!cm_Is8Dot3(entry)) {
1730 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1731 if (leafNode != NONODE) {
1733 Nptr firstDataNode, dataNode, nextDataNode;
1737 /* Found a leaf that matches the key via a case-insensitive
1738 * match. There may be one or more data nodes that match.
1739 * If we have an exact match, return that.
1740 * If we have an ambiguous match, return an error.
1741 * If we have only one inexact match, return that.
1743 slot = getSlot(op->scp->dirBplus, leafNode);
1744 if (slot <= BTERROR) {
1745 op->scp->dirDataVersion = 0;
1749 firstDataNode = getnode(leafNode, slot);
1751 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1753 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1757 nextDataNode = getdatanext(dataNode);
1761 fid = getdatavalue(dataNode).fid;
1763 } else if (count == 1) {
1764 fid = getdatavalue(firstDataNode).fid;
1765 rc = CM_ERROR_INEXACT_MATCH;
1767 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1772 if (rc != CM_ERROR_AMBIGUOUS_FILENAME) {
1773 dfid.vnode = htonl(fid.vnode);
1774 dfid.unique = htonl(fid.unique);
1775 cm_Gen8Dot3NameInt(entry, &dfid, shortName, NULL);
1777 /* delete first the long name and then the short name */
1778 delete(op->scp->dirBplus, key);
1779 key.name = shortName;
1780 delete(op->scp->dirBplus, key);
1783 char * longname = NULL;
1784 /* We need to lookup the 8dot3 name to determine what the
1785 * matching long name is
1787 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1788 if (leafNode != NONODE) {
1790 Nptr firstDataNode, dataNode, nextDataNode;
1794 /* Found a leaf that matches the key via a case-insensitive
1795 * match. There may be one or more data nodes that match.
1796 * If we have an exact match, return that.
1797 * If we have an ambiguous match, return an error.
1798 * If we have only one inexact match, return that.
1800 slot = getSlot(op->scp->dirBplus, leafNode);
1801 if (slot <= BTERROR) {
1802 op->scp->dirDataVersion = 0;
1807 firstDataNode = getnode(leafNode, slot);
1809 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1811 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1815 nextDataNode = getdatanext(dataNode);
1819 longname = getdatavalue(dataNode).longname;
1821 } else if (count == 1) {
1822 longname = getdatavalue(firstDataNode).longname;
1823 rc = CM_ERROR_INEXACT_MATCH;
1825 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1829 if (rc != CM_ERROR_AMBIGUOUS_FILENAME) {
1831 key.name = longname;
1832 delete(op->scp->dirBplus, key);
1835 delete(op->scp->dirBplus, key);
1840 QueryPerformanceCounter(&end);
1842 bplus_remove_time += (end.QuadPart - start.QuadPart);
1850 int cm_BPlusDirFoo(struct cm_scache *scp, struct cm_dirEntry *dep,
1851 void *dummy, osi_hyper_t *entryOffsetp)
1853 keyT key = {dep->name};
1856 long normalized_len;
1857 char *normalized_name=NULL;
1858 cm_SetFid(&data.fid, scp->fid.cell, scp->fid.volume, ntohl(dep->fid.vnode), ntohl(dep->fid.unique));
1859 data.longname = NULL;
1861 normalized_len = cm_NormalizeUtf8String(dep->name, -1, NULL, 0);
1863 normalized_name = malloc(normalized_len);
1864 if (normalized_name) {
1865 cm_NormalizeUtf8String(dep->name, -1, normalized_name, normalized_len);
1866 key.name = normalized_name;
1868 key.name = dep->name;
1871 /* the Write lock is held in cm_BPlusDirBuildTree() */
1872 insert(scp->dirBplus, key, data);
1873 if (!cm_Is8Dot3(dep->name)) {
1875 dfid.vnode = dep->fid.vnode;
1876 dfid.unique = dep->fid.unique;
1878 cm_Gen8Dot3NameInt(dep->name, &dfid, shortName, NULL);
1880 key.name = shortName;
1881 data.longname = strdup(dep->name);
1882 insert(scp->dirBplus, key, data);
1885 if (normalized_name)
1886 free(normalized_name);
1889 findAllBtreeValues(scp->dirBplus);
1896 * scp->dirlock must be writeLocked before call
1898 * scp->mutex must not be held
1900 long cm_BPlusDirBuildTree(cm_scache_t *scp, cm_user_t *userp, cm_req_t* reqp)
1904 LARGE_INTEGER start, end;
1906 osi_assertx(scp->dirBplus == NULL, "cm_BPlusDirBuildTree called on non-empty tree");
1908 lock_AssertWrite(&scp->dirlock);
1910 QueryPerformanceCounter(&start);
1913 if (scp->dirBplus == NULL) {
1914 scp->dirBplus = initBtree(64, MAX_FANOUT, compareKeys);
1916 if (scp->dirBplus == NULL) {
1920 thyper.HighPart = 0;
1921 rc = cm_ApplyDir(scp, cm_BPlusDirFoo, NULL, &thyper, userp, reqp, NULL);
1924 QueryPerformanceCounter(&end);
1926 bplus_build_time += (end.QuadPart - start.QuadPart);
1929 cm_BPlusDirEnumTest(scp, 1);
1934 int cm_MemDumpBPlusStats(FILE *outputFile, char *cookie, int lock)
1939 sprintf(output, "%s - B+ Lookup Hits: %-8d\r\n", cookie, bplus_lookup_hits);
1940 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1941 sprintf(output, "%s - Inexact Hits: %-8d\r\n", cookie, bplus_lookup_hits_inexact);
1942 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1943 sprintf(output, "%s - Ambiguous Hits: %-8d\r\n", cookie, bplus_lookup_ambiguous);
1944 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1945 sprintf(output, "%s - Misses: %-8d\r\n", cookie, bplus_lookup_misses);
1946 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1947 sprintf(output, "%s - Create: %-8d\r\n", cookie, bplus_create_entry);
1948 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1949 sprintf(output, "%s - Remove: %-8d\r\n", cookie, bplus_remove_entry);
1950 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1951 sprintf(output, "%s - Build Tree: %-8d\r\n", cookie, bplus_build_tree);
1952 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1953 sprintf(output, "%s - Free Tree: %-8d\r\n", cookie, bplus_free_tree);
1954 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1955 sprintf(output, "%s - DV Error: %-8d\r\n", cookie, bplus_dv_error);
1956 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1958 sprintf(output, "%s - B+ Time Lookup: %-16I64d\r\n", cookie, bplus_lookup_time);
1959 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1960 sprintf(output, "%s - Create: %-16I64d\r\n", cookie, bplus_create_time);
1961 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1962 sprintf(output, "%s - Remove: %-16I64d\r\n", cookie, bplus_remove_time);
1963 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1964 sprintf(output, "%s - Build: %-16I64d\r\n", cookie, bplus_build_time);
1965 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1966 sprintf(output, "%s - Free: %-16I64d\r\n", cookie, bplus_free_time);
1967 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1972 void cm_BPlusDumpStats(void)
1974 afsi_log("B+ Lookup Hits: %-8d", bplus_lookup_hits);
1975 afsi_log(" Inexact Hits: %-8d", bplus_lookup_hits_inexact);
1976 afsi_log(" Ambiguous Hits: %-8d", bplus_lookup_ambiguous);
1977 afsi_log(" Misses: %-8d", bplus_lookup_misses);
1978 afsi_log(" Create: %-8d", bplus_create_entry);
1979 afsi_log(" Remove: %-8d", bplus_remove_entry);
1980 afsi_log(" Build Tree: %-8d", bplus_build_tree);
1981 afsi_log(" Free Tree: %-8d", bplus_free_tree);
1982 afsi_log(" DV Error: %-8d", bplus_dv_error);
1984 afsi_log("B+ Time Lookup: %-16I64d", bplus_lookup_time);
1985 afsi_log(" Create: %-16I64d", bplus_create_time);
1986 afsi_log(" Remove: %-16I64d", bplus_remove_time);
1987 afsi_log(" Build: %-16I64d", bplus_build_time);
1988 afsi_log(" Free: %-16I64d", bplus_free_time);
1991 static cm_direnum_t *
1992 cm_BPlusEnumAlloc(afs_uint32 entries)
1994 cm_direnum_t * enump;
2000 size = sizeof(cm_direnum_t)+(entries-1)*sizeof(cm_direnum_entry_t);
2001 enump = (cm_direnum_t *)malloc(size);
2002 memset(enump, 0, size);
2003 enump->count = entries;
2008 cm_BPlusDirEnumerate(cm_scache_t *scp, afs_uint32 locked,
2009 char * maskp, cm_direnum_t **enumpp)
2011 afs_uint32 count = 0, slot, numentries;
2012 Nptr leafNode = NONODE, nextLeafNode;
2013 Nptr firstDataNode, dataNode, nextDataNode;
2014 cm_direnum_t * enump;
2018 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate start");
2020 /* Read lock the bplus tree so the data can't change */
2022 lock_ObtainRead(&scp->dirlock);
2024 if (scp->dirBplus == NULL) {
2025 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate No BPlus Tree");
2029 /* Compute the number of entries */
2030 for (count = 0, leafNode = getleaf(scp->dirBplus); leafNode; leafNode = nextLeafNode) {
2032 for ( slot = 1, numentries = numentries(leafNode); slot <= numentries; slot++) {
2033 firstDataNode = getnode(leafNode, slot);
2035 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
2036 if (maskp == NULL) {
2037 /* name is in getdatakey(dataNode) */
2038 if (getdatavalue(dataNode).longname != NULL ||
2039 cm_Is8Dot3(getdatakey(dataNode).name))
2042 if (cm_Is8Dot3(getdatakey(dataNode).name) &&
2043 smb_V3MatchMask(getdatakey(dataNode).name, maskp, CM_FLAG_CASEFOLD) ||
2044 getdatavalue(dataNode).longname == NULL &&
2045 smb_V3MatchMask(getdatavalue(dataNode).longname, maskp, CM_FLAG_CASEFOLD))
2048 nextDataNode = getdatanext(dataNode);
2052 nextLeafNode = getnextnode(leafNode);
2055 sprintf(buffer, "BPlusTreeEnumerate count = %d", count);
2056 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, buffer));
2058 /* Allocate the enumeration object */
2059 enump = cm_BPlusEnumAlloc(count);
2060 if (enump == NULL) {
2061 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate Alloc failed");
2066 /* Copy the name and fid for each longname entry into the enumeration */
2067 for (count = 0, leafNode = getleaf(scp->dirBplus); leafNode; leafNode = nextLeafNode) {
2069 for ( slot = 1, numentries = numentries(leafNode); slot <= numentries; slot++) {
2070 firstDataNode = getnode(leafNode, slot);
2072 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
2077 if (maskp == NULL) {
2078 if (getdatavalue(dataNode).longname != NULL ||
2079 cm_Is8Dot3(getdatakey(dataNode).name))
2084 if (cm_Is8Dot3(getdatakey(dataNode).name) &&
2085 smb_V3MatchMask(getdatakey(dataNode).name, maskp, CM_FLAG_CASEFOLD) ||
2086 getdatavalue(dataNode).longname == NULL &&
2087 smb_V3MatchMask(getdatavalue(dataNode).longname, maskp, CM_FLAG_CASEFOLD))
2094 if (getdatavalue(dataNode).longname) {
2095 name = strdup(getdatavalue(dataNode).longname);
2098 name = strdup(getdatakey(dataNode).name);
2103 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate strdup failed");
2107 enump->entry[count].name = name;
2108 enump->entry[count].fid = getdatavalue(dataNode).fid;
2110 strncpy(enump->entry[count].shortName, getdatakey(dataNode).name,
2111 sizeof(enump->entry[count].shortName));
2113 enump->entry[count].shortName[0] = '\0';
2116 nextDataNode = getdatanext(dataNode);
2120 nextLeafNode = getnextnode(leafNode);
2125 lock_ReleaseRead(&scp->dirlock);
2127 /* if we failed, cleanup any mess */
2129 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate rc != 0");
2131 for ( count = 0; count < enump->count && enump->entry[count].name; count++ ) {
2132 free(enump->entry[count].name);
2139 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate end");
2145 cm_BPlusDirNextEnumEntry(cm_direnum_t *enump, cm_direnum_entry_t **entrypp)
2147 if (enump == NULL || entrypp == NULL || enump->next > enump->count) {
2150 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry invalid input");
2151 return CM_ERROR_INVAL; \
2154 *entrypp = &enump->entry[enump->next++];
2155 if ( enump->next == enump->count ) {
2156 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry STOPNOW");
2157 return CM_ERROR_STOPNOW;
2160 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry SUCCESS");
2166 cm_BPlusDirFreeEnumeration(cm_direnum_t *enump)
2170 osi_Log0(afsd_logp, "cm_BPlusDirFreeEnumeration");
2173 for ( count = 0; count < enump->count && enump->entry[count].name; count++ ) {
2174 free(enump->entry[count].name);
2182 cm_BPlusDirEnumTest(cm_scache_t * dscp, afs_uint32 locked)
2184 cm_direnum_t * enump = NULL;
2185 cm_direnum_entry_t * entryp;
2188 osi_Log0(afsd_logp, "cm_BPlusDirEnumTest start");
2190 for (code = cm_BPlusDirEnumerate(dscp, locked, NULL, &enump); code == 0; ) {
2191 code = cm_BPlusDirNextEnumEntry(enump, &entryp);
2192 if (code == 0 || code == CM_ERROR_STOPNOW) {
2195 char * type = "ScpNotFound";
2198 scp = cm_FindSCache(&entryp->fid);
2200 switch (scp->fileType) {
2201 case CM_SCACHETYPE_FILE :
2204 case CM_SCACHETYPE_DIRECTORY :
2207 case CM_SCACHETYPE_SYMLINK :
2210 case CM_SCACHETYPE_MOUNTPOINT:
2211 type = "MountPoint";
2213 case CM_SCACHETYPE_DFSLINK :
2216 case CM_SCACHETYPE_INVALID :
2224 dv = scp->dataVersion;
2225 cm_ReleaseSCache(scp);
2228 sprintf(buffer, "'%s' Fid = (%d,%d,%d,%d) Short = '%s' Type %s DV %I64d",
2230 entryp->fid.cell, entryp->fid.volume, entryp->fid.vnode, entryp->fid.unique,
2235 osi_Log0(afsd_logp, osi_LogSaveString(afsd_logp, buffer));
2240 cm_BPlusDirFreeEnumeration(enump);
2242 osi_Log0(afsd_logp, "cm_BPlusDirEnumTest end");
2246 #endif /* USE_BPLUS */