2 * Copyright 2007-2008 Secure Endpoints Inc.
6 * This software has been released under the terms of the IBM Public
7 * License. For details, see the LICENSE file in the top-level source
8 * directory or online at http://www.openafs.org/dl/license10.html
10 * Thanks to Jan Jannink for B+ tree algorithms.
13 #include <afsconfig.h>
14 #include <afs/param.h>
27 /******************* statistics globals *********************************/
28 afs_uint32 bplus_lookup_hits = 0;
29 afs_uint32 bplus_lookup_hits_inexact = 0;
30 afs_uint32 bplus_lookup_misses = 0;
31 afs_uint32 bplus_lookup_ambiguous = 0;
32 afs_uint32 bplus_create_entry = 0;
33 afs_uint32 bplus_remove_entry = 0;
34 afs_uint32 bplus_build_tree = 0;
35 afs_uint32 bplus_free_tree = 0;
36 afs_uint32 bplus_dv_error = 0;
38 afs_uint64 bplus_lookup_time = 0;
39 afs_uint64 bplus_create_time = 0;
40 afs_uint64 bplus_remove_time = 0;
41 afs_uint64 bplus_build_time = 0;
42 afs_uint64 bplus_free_time = 0;
44 /*********************** private functions *************************/
45 static void initFreeNodePool(Tree *B, int quantity);
46 static Nptr getFreeNode(Tree *B);
47 static void putFreeNode(Tree *B, Nptr self);
48 static void cleanupNodePool(Tree *B);
50 static Nptr descendToLeaf(Tree *B, Nptr curr);
51 int getSlot(Tree *B, Nptr curr);
52 static int findKey(Tree *B, Nptr curr, int lo, int hi);
53 static int bestMatch(Tree *B, Nptr curr, int slot);
55 static Nptr getDataNode(Tree *B, keyT key, dataT data);
56 static Nptr descendSplit(Tree *B, Nptr curr);
57 static void insertEntry(Tree *B, Nptr node, int slot, Nptr sibling, Nptr downPtr);
58 static void placeEntry(Tree *B, Nptr node, int slot, Nptr downPtr);
59 static Nptr split(Tree *B, Nptr node);
60 static void makeNewRoot(Tree *B, Nptr oldRoot, Nptr newNode);
62 static Nptr descendBalance(Tree *B, Nptr curr, Nptr left, Nptr right, Nptr lAnc, Nptr rAnc, Nptr parent);
63 static void collapseRoot(Tree *B, Nptr oldRoot, Nptr newRoot);
64 static void removeEntry(Tree *B, Nptr curr, int slot);
65 static Nptr merge(Tree *B, Nptr left, Nptr right, Nptr anchor);
66 static Nptr shift(Tree *B, Nptr left, Nptr right, Nptr anchor);
68 static void _clrentry(Nptr node, int entry);
69 static void _pushentry(Nptr node, int entry, int offset);
70 static void _pullentry(Nptr node, int entry, int offset);
71 static void _xferentry(Nptr srcNode, int srcEntry, Nptr destNode, int destEntry);
72 static void _setentry(Nptr node, int entry, keyT key, Nptr downNode);
74 /* access key and data values for B+tree methods */
75 /* pass values to getSlot(), descend...() */
76 static keyT getfunkey(Tree *B);
77 static dataT getfundata(Tree *B);
78 static void setfunkey(Tree *B, keyT v);
79 static void setfundata(Tree *B, dataT v);
83 static int _isRoot(Tree *B, Nptr n)
85 int flagset = ((n->flags & isROOT) == isROOT);
90 if (flagset && n != getroot(B))
96 static int _isFew(Tree *B, Nptr n)
98 int flagset = ((n->flags & FEWEST) == FEWEST);
99 int fanout = getminfanout(B, n);
100 int entries = numentries(n);
101 int mincnt = entries <= fanout;
106 if (flagset && !mincnt || !flagset && mincnt)
112 static int _isFull(Tree *B, Nptr n)
114 int flagset = ((n->flags & isFULL) == isFULL);
115 int maxcnt = numentries(n) == getfanout(B);
120 if (flagset && !maxcnt || !flagset && maxcnt)
125 #endif /* DEBUG_BTREE */
127 /***********************************************************************\
128 | B+tree Initialization and Cleanup Routines |
129 \***********************************************************************/
130 static DWORD TlsKeyIndex;
131 static DWORD TlsDataIndex;
133 long cm_InitBPlusDir(void)
135 if ((TlsKeyIndex = TlsAlloc()) == TLS_OUT_OF_INDEXES)
138 if ((TlsDataIndex = TlsAlloc()) == TLS_OUT_OF_INDEXES)
144 /******************** Set up B+tree structure **********************/
145 Tree *initBtree(unsigned int poolsz, unsigned int fanout, KeyCmp keyCmp)
149 dataT data = {0,0,0,0,0,0,0};
151 if (fanout > MAX_FANOUT)
154 setbplustree(B, malloc(sizeof(Tree)));
155 memset(B, 0, sizeof(Tree));
156 setfanout(B, fanout);
157 setminfanout(B, (fanout + 1) >> 1);
158 initFreeNodePool(B, poolsz);
160 setleaf(B, getFreeNode(B)); /* set up the first leaf node */
161 setroot(B, getleaf(B)); /* the root is initially the leaf */
162 setflag(getroot(B), isLEAF);
163 setflag(getroot(B), isROOT);
164 setflag(getroot(B), FEWEST);
169 setcomparekeys(B, keyCmp);
172 StringCbPrintfA(B->message, sizeof(B->message), "INIT: B+tree of fanout %d at %10p.\n", fanout, (void *)B);
173 OutputDebugString(B->message);
179 /******************** Clean up B+tree structure ********************/
181 * dirlock must be write locked
183 void freeBtree(Tree *B)
186 StringCbPrintfA(B->message, sizeof(B->message), "FREE: B+tree at %10p.\n", (void *) B);
187 OutputDebugString(B->message);
192 memset(B, 0, sizeof(*B));
197 /* access key and data values for B+tree methods */
198 /* pass values to getSlot(), descend...() */
199 static keyT getfunkey(Tree *B) {
202 // Retrieve a data pointer for the current thread.
203 tlsKey = (keyT *) TlsGetValue(TlsKeyIndex);
204 if (tlsKey == NULL) {
205 if (GetLastError() != ERROR_SUCCESS)
206 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
208 osi_panic("get before set", __FILE__, __LINE__);
214 static dataT getfundata(Tree *B) {
217 // Retrieve a data pointer for the current thread.
218 tlsData = (dataT *) TlsGetValue(TlsDataIndex);
219 if (tlsData == NULL) {
220 if (GetLastError() != ERROR_SUCCESS)
221 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
223 osi_panic("get before set", __FILE__, __LINE__);
229 static void setfunkey(Tree *B, keyT theKey) {
232 tlsKey = (keyT *) TlsGetValue(TlsKeyIndex);
233 if (tlsKey == NULL) {
234 if (GetLastError() != ERROR_SUCCESS)
235 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
237 tlsKey = malloc(sizeof(keyT));
239 if (!TlsSetValue(TlsKeyIndex, tlsKey))
240 osi_panic("TlsSetValue failed", __FILE__, __LINE__);
246 static void setfundata(Tree *B, dataT theData) {
249 tlsData = (dataT *) TlsGetValue(TlsDataIndex);
250 if (tlsData == NULL) {
251 if (GetLastError() != ERROR_SUCCESS)
252 osi_panic("TlsGetValue failed", __FILE__, __LINE__);
254 tlsData = malloc(sizeof(dataT));
256 if (!TlsSetValue(TlsDataIndex, tlsData))
257 osi_panic("TlsSetValue failed", __FILE__, __LINE__);
264 /***********************************************************************\
265 | Find leaf node in which data nodes can be found |
266 \***********************************************************************/
268 /********************** top level lookup **********************/
269 Nptr bplus_Lookup(Tree *B, keyT key)
274 StringCbPrintfA(B->message, sizeof(B->message), "LOOKUP: key %S.\n", key.name);
275 OutputDebugString(B->message);
278 setfunkey(B, key); /* set search key */
279 leafNode = descendToLeaf(B, getroot(B)); /* start search from root node */
287 slot = getSlot(B, leafNode);
291 dataNode = getnode(leafNode, slot);
292 data = getdatavalue(dataNode);
294 StringCbPrintfA(B->message, sizeof(B->message), "LOOKUP: %S found on page %d value (%d.%d.%d).\n",
296 getnodenumber(B, leafNode),
301 StringCbPrintfA(B->message, sizeof(B->message), "LOOKUP: not found!\n");
302 OutputDebugString(B->message);
308 /********************** `recurse' down B+tree **********************/
309 static Nptr descendToLeaf(Tree *B, Nptr curr)
316 memset(prev, 0, sizeof(prev));
318 for (depth = 0, slot = getSlot(B, curr); (slot >= 0) && isinternal(curr); depth++, slot = getSlot(B, curr)) {
321 curr = getfirstnode(curr);
323 curr = getnode(curr, slot);
324 else /* BTERROR, BTLOWER, BTUPPER */ {
333 if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0))
334 findNode = curr; /* correct key value found */
336 findNode = NONODE; /* key value not in tree */
341 /******************** find slot for search key *********************/
342 int getSlot(Tree *B, Nptr curr)
346 entries = numentries(curr); /* need this if root is ever empty */
347 slot = !entries ? 0 : findKey(B, curr, 1, entries);
353 /******************** recursive binary search **********************/
354 static int findKey(Tree *B, Nptr curr, int lo, int hi)
356 int mid, findslot = BTERROR;
359 findslot = bestMatch(B, curr, lo); /* recursion base case */
362 if (findslot == BTERROR) {
363 StringCbPrintfA(B->message, sizeof(B->message), "FINDKEY: (lo %d hi %d) Bad key ordering on node %d (0x%p)\n",
364 lo, hi, getnodenumber(B, curr), curr);
365 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
369 mid = (lo + hi) >> 1;
370 switch (findslot = bestMatch(B, curr, mid)) {
371 case BTLOWER: /* check lower half of range */
373 findslot = findKey(B, curr, lo, mid - 1); /* never in 2-3+trees */
375 case BTUPPER: /* check upper half of range */
376 if (mid < getfanout(B))
377 findslot = findKey(B, curr, mid + 1, hi);
380 StringCbPrintfA(B->message, sizeof(B->message), "FINDKEY: (lo %d hi %d) Bad key ordering on node %d (0x%p)\n",
381 lo, hi, getnodenumber(B, curr), curr);
382 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
386 if (isleaf(curr) && findslot == 0)
388 StringCbPrintfA(B->message, sizeof(B->message), "FINDKEY: (lo %d hi %d) findslot %d is invalid for leaf nodes, bad key ordering on node %d (0x%p)\n",
389 lo, hi, findslot, getnodenumber(B, curr), curr);
390 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
396 /************ comparison of key with a target key slot *************/
397 static int bestMatch(Tree *B, Nptr curr, int slot)
399 int diff, comp=2, findslot;
401 diff = comparekeys(B)(getfunkey(B), getkey(curr, slot), 0);
404 } else if (diff < 0) { /* also check previous slot */
407 findslot = BTLOWER; /* not found in the tree */
411 else if ((comp = comparekeys(B)(getfunkey(B), getkey(curr, slot - 1), 0)) >= 0) {
413 } else if (comp < diff) {
414 findslot = BTERROR; /* inconsistent ordering of keys */
419 findslot = BTLOWER; /* key must be below in node ordering */
421 } else { /* or check following slot */
422 if (slot == numentries(curr)) {
423 if (isleaf(curr) && numentries(curr) == getfanout(B))
427 } else if ((comp = comparekeys(B)(getfunkey(B), getkey(curr, slot + 1), 0)) < 0) {
429 } else if (comp == 0) {
431 } else if (comp > diff) {
432 findslot = BTERROR; /* inconsistent ordering of keys */
437 findslot = BTUPPER; /* key must be above in node ordering */
441 if (findslot == BTERROR || isleaf(curr) && findslot == 0)
443 StringCbPrintfA(B->message, sizeof(B->message), "BESTMATCH: node %d (0x%p) slot %d diff %d comp %d findslot %d\n",
444 getnodenumber(B, curr), curr, slot, diff, comp, findslot);
445 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
451 /***********************************************************************\
452 | Insert new data into tree |
453 \***********************************************************************/
456 /********************** top level insert call **********************/
457 void insert(Tree *B, keyT key, dataT data)
462 StringCbPrintfA(B->message, sizeof(B->message), "INSERT: key %S.\n", key.name);
463 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
466 setfunkey(B, key); /* set insertion key */
467 setfundata(B, data); /* a node containing data */
468 setsplitpath(B, NONODE);
469 newNode = descendSplit(B, getroot(B)); /* insertion point search from root */
470 if (newNode != getsplitpath(B)) /* indicates the root node has split */
471 makeNewRoot(B, getroot(B), newNode);
475 /***************** recurse down and split back up ******************/
477 descendSplit(Tree *B, Nptr curr)
479 Nptr downNode = NONODE, sibling = NONODE;
487 setsplitpath(B, NONODE);
488 else if (getsplitpath(B) == NONODE)
489 setsplitpath(B, curr); /* indicates where nodes must split */
491 slot = getSlot(B, curr); /* is null only if the root is empty */
498 else if (slot == BTUPPER)
502 if (isinternal(curr)) { /* continue recursion to leaves */
504 downNode = descendSplit(B, getfirstnode(curr));
506 downNode = descendSplit(B, getnode(curr, slot));
507 } else if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0)) {
508 if (!(gettreeflags(B) & TREE_FLAG_UNIQUE_KEYS)) {
509 downNode = getDataNode(B, getfunkey(B), getfundata(B));
510 getdatanext(downNode) = getnode(curr,slot);
511 setnode(curr, slot, downNode);
514 setsplitpath(B, NONODE);
517 downNode = getDataNode(B, getfunkey(B), getfundata(B)); /* an insertion takes place */
519 if (downNode != NONODE) { /* insert only where necessary */
520 if (getsplitpath(B) != NONODE)
521 sibling = split(B, curr); /* a sibling node is prepared */
522 insertEntry(B, curr, slot, sibling, downNode);
528 /*************** determine location of inserted key ****************/
530 insertEntry(Tree *B, Nptr currNode, int slot, Nptr sibling, Nptr downPtr)
532 int split, i, j, k, x, y;
536 StringCbPrintfA(B->message, sizeof(B->message), "INSERT: slot %d, down node %d.\n", slot, getnodenumber(B, downPtr));
537 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
540 if (sibling == NONODE) { /* no split occurred */
541 placeEntry(B, currNode, slot + 1, downPtr);
543 else { /* split entries between the two */
544 if isinternal(currNode) {
546 split = getfanout(B) - getminfanout(B, currNode);
547 } else if (isroot(currNode)) {
548 /* split the root node and turn it into just a leaf */
550 split = getminfanout(B, currNode);
553 split = getminfanout(B, currNode);
555 j = (slot != split ? 1 : 0);
556 k = (slot >= split ? 1 : 0);
559 * Move entries from the top half of the current node to
560 * to the sibling node.
561 * The number of entries to move is dependent upon where
562 * the new entry is going to be added in relationship to
563 * the split slot. (slots are 1-based). If order to produce
564 * a balanced tree, if the insertion slot is greater than
565 * the split we move one less entry as the new entry will
566 * be inserted into the sibling.
568 * If the node that is being split is an internal node (i != 0)
569 * then we move one less entry due to the extra down pointer
570 * when the split slot is not equal to the insertion slot
572 for (x = split + k + j * i, y = 1; x <= getfanout(B); x++, y++) {
573 xferentry(currNode, x, sibling, y); /* copy entries to sibling */
574 clrentry(currNode, x);
575 decentries(currNode);
579 if (getkey(sibling, numentries(sibling)).name == NULL)
583 clrflag(currNode, isFULL);
584 if (numentries(currNode) == getminfanout(B, currNode))
585 setflag(currNode, FEWEST); /* never happens in even size nodes */
588 if (numentries(sibling) > getfanout(B))
591 if (numentries(sibling) == getfanout(B))
592 setflag(sibling, isFULL); /* only ever happens in 2-3+trees */
594 if (numentries(sibling) > getminfanout(B, sibling))
595 clrflag(sibling, FEWEST);
597 if (i) { /* set first pointer of internal node */
599 setfirstnode(sibling, getnode(currNode, split + k));
600 decentries(currNode);
601 if (numentries(currNode) == getminfanout(B, currNode))
602 setflag(currNode, FEWEST);
604 clrflag(currNode, FEWEST);
607 setfirstnode(sibling, downPtr);
610 if (j) { /* insert new entry into correct spot */
612 placeEntry(B, sibling, slot - split + 1 - i, downPtr);
614 placeEntry(B, currNode, slot + 1, downPtr);
616 /* set key separating nodes */
618 key = getkey(sibling, 1);
620 Nptr leaf = getfirstnode(sibling);
621 while ( isinternal(leaf) )
622 leaf = getfirstnode(leaf);
623 key = getkey(leaf, 1);
628 placeEntry(B, sibling, 1, downPtr);
632 /************ place key into appropriate node & slot ***************/
634 placeEntry(Tree *B, Nptr node, int slot, Nptr downPtr)
644 if (numentries(node) != 0 && getkey(node, numentries(node)).name == NULL)
647 for (x = numentries(node); x >= slot && x != 0; x--) /* make room for new entry */
648 pushentry(node, x, 1);
649 setentry(node, slot, getfunkey(B), downPtr);/* place it in correct slot */
651 incentries(node); /* adjust entry counter */
653 if (getkey(node, numentries(node)).name == NULL)
657 if (numentries(node) == getfanout(B))
658 setflag(node, isFULL);
659 if (numentries(node) > getminfanout(B, node))
660 clrflag(node, FEWEST);
662 setflag(node, FEWEST);
666 /***************** split full node and set flags *******************/
668 split(Tree *B, Nptr node)
672 sibling = getFreeNode(B);
674 setflag(sibling, FEWEST); /* set up node flags */
677 setflag(sibling, isLEAF);
678 setnextnode(sibling, getnextnode(node));/* adjust leaf pointers */
679 setnextnode(node, sibling);
681 if (getsplitpath(B) == node)
682 setsplitpath(B, NONODE); /* no more splitting needed */
685 clrflag(node, isROOT);
691 /********************** build new root node ************************/
693 makeNewRoot(Tree *B, Nptr oldRoot, Nptr newNode)
695 setroot(B, getFreeNode(B));
697 setfirstnode(getroot(B), oldRoot); /* old root becomes new root's child */
698 setentry(getroot(B), 1, getfunkey(B), newNode); /* old root's sibling also */
699 incentries(getroot(B));
701 if (numentries(getroot(B)) > getfanout(B))
705 /* the oldRoot's isROOT flag was cleared in split() */
706 setflag(getroot(B), isROOT);
707 setflag(getroot(B), FEWEST);
708 clrflag(getroot(B), isLEAF);
713 /***********************************************************************\
714 | Delete data from tree |
715 \***********************************************************************/
717 /********************** top level delete call **********************\
719 | The recursive call for deletion carries 5 additional parameters
720 | which may be needed to rebalance the B+tree when removing the key.
721 | These parameters are:
722 | 1. immediate left neighbor of the current node
723 | 2. immediate right neighbor of the current node
724 | 3. the anchor of the current node and left neighbor
725 | 4. the anchor of the current node and right neighbor
726 | 5. the parent of the current node
728 | All of these parameters are simple to calculate going along the
729 | recursive path to the leaf nodes and the point of key deletion.
730 | At that time, the algorithm determines which node manipulations
731 | are most efficient, that is, cause the least rearranging of data,
732 | and minimize the need for non-local key manipulation.
734 \***********************************************************************/
735 void delete(Tree *B, keyT key)
740 StringCbPrintfA(B->message, sizeof(B->message), "DELETE: key %S.\n", key.name);
741 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
744 setfunkey(B, key); /* set deletion key */
745 setmergepath(B, NONODE);
746 newNode = descendBalance(B, getroot(B), NONODE, NONODE, NONODE, NONODE, NONODE);
747 if (isnode(newNode)) {
749 StringCbPrintfA(B->message, sizeof(B->message), "DELETE: collapsing node %d", getnodenumber(B, newNode));
750 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
752 collapseRoot(B, getroot(B), newNode); /* remove root when superfluous */
757 /********************** remove old root node ***********************/
759 collapseRoot(Tree *B, Nptr oldRoot, Nptr newRoot)
763 StringCbPrintfA(B->message, sizeof(B->message), "COLLAPSE: old %d, new %d.\n", getnodenumber(B, oldRoot), getnodenumber(B, newRoot));
764 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
765 showNode(B, "collapseRoot oldRoot", oldRoot);
766 showNode(B, "collapseRoot newRoot", newRoot);
770 setflag(newRoot, isROOT);
771 clrflag(newRoot, FEWEST);
772 putFreeNode(B, oldRoot);
773 dectreeheight(B); /* the height of the tree decreases */
777 /**************** recurse down and balance back up *****************/
779 descendBalance(Tree *B, Nptr curr, Nptr left, Nptr right, Nptr lAnc, Nptr rAnc, Nptr parent)
781 Nptr newMe=NONODE, myLeft=NONODE, myRight=NONODE, lAnchor=NONODE, rAnchor=NONODE, newNode=NONODE;
782 int slot = 0, notleft = 0, notright = 0, fewleft = 0, fewright = 0, test = 0;
785 StringCbPrintfA(B->message, sizeof(B->message), "descendBalance curr %d, left %d, right %d, lAnc %d, rAnc %d, parent %d\n",
786 curr ? getnodenumber(B, curr) : -1,
787 left ? getnodenumber(B, left) : -1,
788 right ? getnodenumber(B, right) : -1,
789 lAnc ? getnodenumber(B, lAnc) : -1,
790 rAnc ? getnodenumber(B, rAnc) : -1,
791 parent ? getnodenumber(B, parent) : -1);
792 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
796 setmergepath(B,NONODE);
797 else if (getmergepath(B) == NONODE)
798 setmergepath(B, curr); /* mark which nodes may need rebalancing */
800 slot = getSlot(B, curr);
807 else if (slot == BTUPPER)
811 if (isinternal(curr)) /* set up next recursion call's parameters */
814 newNode = getfirstnode(curr);
815 myLeft = !isnode(left) ? NONODE : getlastnode(left);
819 newNode = getnode(curr, slot);
821 myLeft = getfirstnode(curr);
823 myLeft = getnode(curr, slot - 1);
827 if (slot == numentries(curr)) {
828 myRight = !isnode(right) ? NONODE : getfirstnode(right);
832 myRight = getnode(curr, slot + 1);
835 newMe = descendBalance(B, newNode, myLeft, myRight, lAnchor, rAnchor, curr);
837 else if ((slot > 0) && !comparekeys(B)(getfunkey(B), getkey(curr, slot), 0))
843 newNode = getnode(curr, slot);
844 next = getdatanext(newNode);
847 * We only delete exact matches.
849 if (!comparekeys(B)(getfunkey(B), getdatakey(newNode), EXACT_MATCH)) {
850 /* exact match, free the first entry */
851 setnode(curr, slot, next);
853 if (next == NONODE) {
854 /* delete this key as there are no more data values */
857 /* otherwise, there are more and we leave the key in place */
858 setnode(curr, slot, next);
859 putFreeNode(B, newNode);
861 /* but do not delete the key */
863 setmergepath(B, NONODE);
865 } else if (next == NONODE) {
867 * we didn't find an exact match and there are no more
868 * choices. so we leave it alone and remove nothing.
871 setmergepath(B, NONODE);
873 /* The first data node doesn't match but there are other
874 * options. So we must determine if any of the next nodes
875 * are the one we are looking for.
880 if (!comparekeys(B)(getfunkey(B), getdatakey(next), EXACT_MATCH)) {
881 /* we found the one to delete */
882 getdatanext(prev) = getdatanext(next);
883 putFreeNode(B, next);
887 next = getdatanext(next);
890 /* do not delete the key */
892 setmergepath(B, NONODE);
897 newMe = NONODE; /* no deletion possible, key not found */
898 setmergepath(B, NONODE);
901 /***************** rebalancing tree after deletion *****************\
903 | The simplest B+tree rebalancing consists of the following rules.
905 | If a node underflows:
906 | CASE 1 check if it is the root, and collapse it if it is,
907 | CASE 2 otherwise, check if both of its neighbors are minimum
908 | sized and merge the underflowing node with one of them,
909 | CASE 3 otherwise shift surplus entries to the underflowing node.
911 | The choice of which neighbor to use is optional. However, the
912 | rebalancing rules that follow also ensure whenever possible
913 | that the merges and shifts which do occur use a neighbor whose
914 | anchor is the parent of the underflowing node.
916 | Cases 3, 4, 5 below are more an optimization than a requirement,
917 | and can be omitted, with a change of the action value in case 6,
918 | which actually corresponds to the third case described above.
920 \***********************************************************************/
922 /* begin deletion, working upwards from leaves */
924 if (newMe != NONODE) { /* this node removal doesn't consider duplicates */
926 StringCbPrintfA(B->message, sizeof(B->message), "descendBalance DELETE: slot %d, node %d.\n", slot, getnodenumber(B, curr));
927 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
930 removeEntry(B, curr, slot + (newMe != newNode)); /* removes one of two */
933 showNode(B, "descendBalance curr", curr);
937 if (getmergepath(B) == NONODE)
939 else { /* tree rebalancing rules for node merges and shifts */
940 notleft = !isnode(left);
941 notright = !isnode(right);
943 fewleft = isfew(left); /* only used when defined */
945 fewright = isfew(right);
947 /* CASE 1: prepare root node (curr) for removal */
948 if (notleft && notright) {
949 test = isleaf(curr); /* check if B+tree has become empty */
950 newNode = test ? NONODE : getfirstnode(curr);
952 /* CASE 2: the merging of two nodes is a must */
953 else if ((notleft || fewleft) && (notright || fewright)) {
954 test = (lAnc != parent);
955 newNode = test ? merge(B, curr, right, rAnc) : merge(B, left, curr, lAnc);
957 /* CASE 3: choose the better of a merge or a shift */
958 else if (!notleft && fewleft && !notright && !fewright) {
959 test = (rAnc != parent) && (curr == getmergepath(B));
960 newNode = test ? merge(B, left, curr, lAnc) : shift(B, curr, right, rAnc);
962 /* CASE 4: also choose between a merge or a shift */
963 else if (!notleft && !fewleft && !notright && fewright) {
964 test = !(lAnc == parent) && (curr == getmergepath(B));
965 newNode = test ? merge(B, curr, right, rAnc) : shift(B, left, curr, lAnc);
967 /* CASE 5: choose the more effective of two shifts */
968 else if (lAnc == rAnc) { /* => both anchors are the parent */
969 test = (numentries(left) <= numentries(right));
970 newNode = test ? shift(B, curr, right, rAnc) : shift(B, left, curr, lAnc);
972 /* CASE 6: choose the shift with more local effect */
973 else { /* if omitting cases 3,4,5 use below */
974 test = (lAnc == parent); /* test = (!notleft && !fewleft); */
975 newNode = test ? shift(B, left, curr, lAnc) : shift(B, curr, right, rAnc);
980 StringCbPrintfA(B->message, sizeof(B->message), "descendBalance returns %d\n", getnodenumber(B, newNode));
981 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
987 /**************** remove key and pointer from node *****************/
989 removeEntry(Tree *B, Nptr curr, int slot)
993 putFreeNode(B, getnode(curr, slot)); /* return deleted node to free list */
994 for (x = slot; x < numentries(curr); x++)
995 pullentry(curr, x, 1); /* adjust node with removed key */
997 clrflag(curr, isFULL); /* keep flag information up to date */
998 if (numentries(curr) > getminfanout(B, curr))
999 clrflag(curr, FEWEST);
1001 setflag(curr, FEWEST);
1005 /******* merge a node pair & set emptied node up for removal *******/
1007 merge(Tree *B, Nptr left, Nptr right, Nptr anchor)
1012 StringCbPrintfA(B->message, sizeof(B->message), "MERGE: left %d, right %d.\n", getnodenumber(B, left), getnodenumber(B, right));
1013 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1014 showNode(B, "pre-merge anchor", anchor);
1015 showNode(B, "pre-merge left", left);
1016 showNode(B, "pre-merge right", right);
1019 if (isinternal(left)) {
1020 incentries(left); /* copy key separating the nodes */
1022 if (numentries(left) > getfanout(B))
1025 setfunkey(B, getkey(right, 1)); /* defined but maybe just deleted */
1026 z = getSlot(B, anchor); /* needs the just calculated key */
1029 setfunkey(B, getkey(anchor, z)); /* set slot to delete in anchor */
1030 setentry(left, numentries(left), getfunkey(B), getfirstnode(right));
1033 setnextnode(left, getnextnode(right));
1035 for (x = numentries(left) + 1, y = 1; y <= numentries(right); x++, y++) {
1038 if (numentries(left) > getfanout(B))
1041 xferentry(right, y, left, x); /* transfer entries to left node */
1043 clearentries(right);
1045 if (numentries(left) > getminfanout(B, left))
1046 clrflag(left, FEWEST);
1047 if (numentries(left) == getfanout(B))
1048 setflag(left, isFULL); /* never happens in even size nodes */
1050 if (getmergepath(B) == left || getmergepath(B) == right)
1051 setmergepath(B, NONODE); /* indicate rebalancing is complete */
1054 showNode(B, "post-merge anchor", anchor);
1055 showNode(B, "post-merge left", left);
1056 showNode(B, "post-merge right", right);
1062 /****** shift entries in a node pair & adjust anchor key value *****/
1064 shift(Tree *B, Nptr left, Nptr right, Nptr anchor)
1069 StringCbPrintfA(B->message, sizeof(B->message), "SHIFT: left %d, right %d, anchor %d.\n",
1070 getnodenumber(B, left),
1071 getnodenumber(B, right),
1072 getnodenumber(B, anchor));
1073 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1074 showNode(B, "pre-shift anchor", anchor);
1075 showNode(B, "pre-shift left", left);
1076 showNode(B, "pre-shift right", right);
1079 i = isinternal(left);
1081 if (numentries(left) < numentries(right)) { /* shift entries to left */
1082 y = (numentries(right) - numentries(left)) >> 1;
1083 x = numentries(left) + y;
1084 setfunkey(B, getkey(right, y + 1 - i)); /* set new anchor key value */
1085 z = getSlot(B, anchor); /* find slot in anchor node */
1089 if (z == 0 && !isroot(anchor))
1092 if (i) { /* move out old anchor value */
1093 decentries(right); /* adjust for shifting anchor */
1096 if (numentries(left) > getfanout(B))
1099 setentry(left, numentries(left), getkey(anchor, z), getfirstnode(right));
1100 setfirstnode(right, getnode(right, y + 1 - i));
1102 clrflag(right, isFULL);
1103 setkey(anchor, z, getfunkey(B)); /* set new anchor value */
1104 for (z = y, y -= i; y > 0; y--, x--) {
1105 decentries(right); /* adjust entry count */
1108 if (numentries(left) > getfanout(B))
1111 xferentry(right, y, left, x); /* transfer entries over */
1114 for (x = 1; x <= numentries(right); x++) /* adjust reduced node */
1115 pullentry(right, x, z);
1117 else if (numentries(left) > numentries(right)) { /* shift entries to right */
1118 y = (numentries(left) - numentries(right)) >> 1;
1119 x = numentries(left) - y + 1;
1121 for (z = numentries(right); z > 0; z--) /* adjust increased node */
1122 pushentry(right, z, y);
1124 setfunkey(B, getkey(left, x)); /* set new anchor key value */
1125 z = getSlot(B, anchor);
1134 if (numentries(right) > getfanout(B))
1137 setentry(right, y, getkey(anchor, z), getfirstnode(right));
1138 setfirstnode(right, getnode(left, x));
1140 clrflag(left, isFULL);
1141 setkey(anchor, z, getfunkey(B));
1142 for (x = numentries(left) + i, y -= i; y > 0; y--, x--) {
1146 if (numentries(right) > getfanout(B))
1149 xferentry(left, x, right, y); /* transfer entries over */
1157 #endif /* DEBUG_BTREE */
1159 if (numentries(left) > getminfanout(B, left)) /* adjust node flags */
1160 clrflag(left, FEWEST); /* never happens in 2-3+trees */
1162 setflag(left, FEWEST);
1163 if (numentries(right) > getminfanout(B, right))
1164 clrflag(right, FEWEST); /* never happens in 2-3+trees */
1166 setflag(right, FEWEST);
1167 setmergepath(B, NONODE);
1170 StringCbPrintfA(B->message, sizeof(B->message), "SHIFT: left %d, right %d.\n", getnodenumber(B, left), getnodenumber(B, right));
1171 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1172 showNode(B, "post-shift anchor", anchor);
1173 showNode(B, "post-shift left", left);
1174 showNode(B, "post-shift right", right);
1182 _clrentry(Nptr node, int entry)
1184 if (getkey(node,entry).name != NULL) {
1185 free(getkey(node,entry).name);
1186 getkey(node,entry).name = NULL;
1188 getnode(node,entry) = NONODE;
1192 _pushentry(Nptr node, int entry, int offset)
1194 if (getkey(node,entry + offset).name != NULL)
1195 free(getkey(node,entry + offset).name);
1200 getkey(node,entry + offset).name = cm_NormStrDup(getkey(node,entry).name);
1202 if ( getnode(node, entry) == NONODE )
1205 getnode(node,entry + offset) = getnode(node,entry);
1209 _pullentry(Nptr node, int entry, int offset)
1211 if (getkey(node,entry).name != NULL)
1212 free(getkey(node,entry).name);
1213 getkey(node,entry).name = cm_NormStrDup(getkey(node,entry + offset).name);
1215 if ( getnode(node, entry + offset) == NONODE )
1218 getnode(node,entry) = getnode(node,entry + offset);
1222 _xferentry(Nptr srcNode, int srcEntry, Nptr destNode, int destEntry)
1224 if (getkey(destNode,destEntry).name != NULL)
1225 free(getkey(destNode,destEntry).name);
1226 getkey(destNode,destEntry).name = cm_NormStrDup(getkey(srcNode,srcEntry).name);
1228 if ( getnode(srcNode, srcEntry) == NONODE )
1231 getnode(destNode,destEntry) = getnode(srcNode,srcEntry);
1235 _setentry(Nptr node, int entry, keyT key, Nptr downNode)
1237 if (getkey(node,entry).name != NULL)
1238 free(getkey(node,entry).name);
1239 getkey(node,entry).name = cm_NormStrDup(key.name);
1241 if ( downNode == NONODE )
1244 getnode(node,entry) = downNode;
1248 /***********************************************************************\
1249 | Empty Node Utilities |
1250 \***********************************************************************/
1252 /********************* Set up initial pool of free nodes ***********/
1254 initFreeNodePool(Tree *B, int quantity)
1259 setfirstallnode(B, NONODE);
1260 setfirstfreenode(B, NONODE);
1262 for (i = 0, p = NONODE; i < quantity; i++) {
1263 n = malloc(sizeof(*n));
1264 memset(n, 0, sizeof(*n));
1265 setnodenumber(B,n,i);
1268 setnextnode(p, n); /* insert node into free node list */
1271 setfirstfreenode(B, n);
1272 setfirstallnode(B, n);
1276 setnextnode(p, NONODE); /* indicates end of free node list */
1277 setallnode(p, NONODE); /* indicates end of all node list */
1279 setpoolsize(B, quantity);
1283 /******************* Cleanup Free Node Pool **************************/
1286 cleanupNodePool(Tree *B)
1291 for ( i=0, node = getfirstallnode(B); node != NONODE && i<getpoolsize(B); node = next, i++ ) {
1293 if ( getdatakey(node).name ) {
1294 free(getdatakey(node).name);
1295 getdatakey(node).name = NULL;
1297 if ( getdatavalue(node).cname ) {
1298 free(getdatavalue(node).cname);
1299 getdatavalue(node).cname = NULL;
1301 if ( getdatavalue(node).fsname ) {
1302 free(getdatavalue(node).fsname);
1303 getdatavalue(node).fsname = NULL;
1305 } else { /* data node */
1306 for ( j=1; j<=getfanout(B); j++ ) {
1307 if (getkey(node, j).name)
1308 free(getkey(node, j).name);
1311 next = getallnode(node);
1316 /************** take a free B+tree node from the pool **************/
1318 getFreeNode(Tree *B)
1320 Nptr newNode = getfirstfreenode(B);
1322 if (newNode != NONODE) {
1323 setfirstfreenode(B, getnextnode(newNode)); /* adjust free node list */
1324 setnextnode(newNode, NONODE); /* remove node from list */
1327 newNode = malloc(sizeof(*newNode));
1328 memset(newNode, 0, sizeof(*newNode));
1330 setallnode(newNode, getfirstallnode(B));
1331 setfirstallnode(B, newNode);
1332 setnodenumber(B, newNode, getpoolsize(B));
1333 setpoolsize(B, getpoolsize(B) + 1);
1336 clearflags(newNode); /* Sets MAGIC */
1341 /************* return a deleted B+tree node to the pool ************/
1343 putFreeNode(Tree *B, Nptr node)
1351 if ( getdatakey(node).name )
1352 free(getdatakey(node).name);
1353 if ( getdatavalue(node).cname )
1354 free(getdatavalue(node).cname);
1355 if ( getdatavalue(node).fsname )
1356 free(getdatavalue(node).fsname);
1357 } else { /* data node */
1358 for ( i=1; i<=getfanout(B); i++ ) {
1359 if (getkey(node, i).name)
1360 free(getkey(node, i).name);
1364 /* free nodes do not have MAGIC set */
1365 memset(&nAdr(node), 0, sizeof(nAdr(node)));
1366 setnextnode(node, getfirstfreenode(B)); /* add node to list */
1367 setfirstfreenode(B, node); /* set it to be list head */
1371 /******* fill a free data node with a key and associated data ******/
1373 getDataNode(Tree *B, keyT key, dataT data)
1375 Nptr newNode = getFreeNode(B);
1377 setflag(newNode, isDATA);
1378 getdatakey(newNode).name = cm_NormStrDup(key.name);
1379 getdatavalue(newNode) = data;
1380 getdatanext(newNode) = NONODE;
1387 /***********************************************************************\
1388 | B+tree Printing Utilities |
1389 \***********************************************************************/
1391 /*********************** B+tree node printer ***********************/
1392 void showNode(Tree *B, const char * where, Nptr n)
1396 StringCbPrintfA(B->message, sizeof(B->message), "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1397 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1398 StringCbPrintfA(B->message, sizeof(B->message), "| %-20s |\n", where);
1399 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1400 StringCbPrintfA(B->message, sizeof(B->message), "| node %6d ", getnodenumber(B, n));
1401 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1402 StringCbPrintfA(B->message, sizeof(B->message), " magic %4x |\n", getmagic(n));
1403 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1404 StringCbPrintfA(B->message, sizeof(B->message), "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1405 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1406 StringCbPrintfA(B->message, sizeof(B->message), "| flags %1d%1d%1d%1d ", isfew(n), isfull(n), isroot(n), isleaf(n));
1407 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1408 StringCbPrintfA(B->message, sizeof(B->message), "| keys = %5d ", numentries(n));
1409 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1410 StringCbPrintfA(B->message, sizeof(B->message), "| node = %6d |\n", getnodenumber(B, getfirstnode(n)));
1411 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1412 for (x = 1; x <= numentries(n); x++) {
1413 StringCbPrintfA(B->message, sizeof(B->message), "| entry %6d ", x);
1414 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1415 StringCbPrintfA(B->message, sizeof(B->message), "| key = %6S ", getkey(n, x).name);
1416 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1417 StringCbPrintfA(B->message, sizeof(B->message), "| node = %6d |\n", getnodenumber(B, getnode(n, x)));
1418 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1420 StringCbPrintfA(B->message, sizeof(B->message), "- -- -- -- -- -- -- -- -- -- -- -- -\n");
1421 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1424 /****************** B+tree class variable printer ******************/
1425 void showBtree(Tree *B)
1427 StringCbPrintfA(B->message, sizeof(B->message), "- -- -- -- -- -- -\n");
1428 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1429 StringCbPrintfA(B->message, sizeof(B->message), "| B+tree %10p |\n", (void *) B);
1430 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1431 StringCbPrintfA(B->message, sizeof(B->message), "- -- -- -- -- -- -\n");
1432 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1433 StringCbPrintfA(B->message, sizeof(B->message), "| root %6d |\n", getnodenumber(B, getroot(B)));
1434 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1435 StringCbPrintfA(B->message, sizeof(B->message), "| leaf %6d |\n", getnodenumber(B, getleaf(B)));
1436 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1437 StringCbPrintfA(B->message, sizeof(B->message), "| fanout %3d |\n", getfanout(B) + 1);
1438 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1439 StringCbPrintfA(B->message, sizeof(B->message), "| minfanout %3d |\n", getminfanout(B, getroot(B)) + 1);
1440 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1441 StringCbPrintfA(B->message, sizeof(B->message), "| height %3d |\n", gettreeheight(B));
1442 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1443 StringCbPrintfA(B->message, sizeof(B->message), "| freenode %6d |\n", getnodenumber(B, getfirstfreenode(B)));
1444 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1445 StringCbPrintfA(B->message, sizeof(B->message), "| theKey %6s |\n", getfunkey(B).name);
1446 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1447 StringCbPrintfA(B->message, sizeof(B->message), "| theData %s (%d.%d.%d) |\n", getfundata(B).fsname, getfundata(B).fid.volume,
1448 getfundata(B).fid.vnode, getfundata(B).fid.unique);
1449 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1450 StringCbPrintfA(B->message, sizeof(B->message), "- -- -- -- -- -- -\n");
1451 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1455 listBtreeNodes(Tree *B, const char * parent_desc, Nptr node)
1461 if (isntnode(node)) {
1462 StringCbPrintfA(B->message, sizeof(B->message), "%s - NoNode!!!\n");
1463 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1469 data = getdatavalue(node);
1470 StringCbPrintfA(B->message, sizeof(B->message), "%s - data node %d (%d.%d.%d)\n",
1471 parent_desc, getnodenumber(B, node),
1472 data.fid.volume, data.fid.vnode, data.fid.unique);
1473 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1476 showNode(B, parent_desc, node);
1478 if ( isinternal(node) || isroot(node) ) {
1479 StringCbPrintfA(thisnode, sizeof(thisnode), "parent %6d", getnodenumber(B , node));
1481 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1482 for ( i= isinternal(node) ? 0 : 1; i <= numentries(node); i++ ) {
1483 listBtreeNodes(B, thisnode, i == 0 ? getfirstnode(node) : getnode(node, i));
1488 /*********************** B+tree data printer ***********************/
1490 listBtreeValues(Tree *B, Nptr n, int num)
1496 for (slot = 1; (n != NONODE) && num && numentries(n); num--) {
1497 if (comparekeys(B)(getkey(n, slot),prev, 0) < 0) {
1498 StringCbPrintfA(B->message, sizeof(B->message), "BOMB %8s\n", getkey(n, slot).name);
1499 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1502 prev = getkey(n, slot);
1503 data = getdatavalue(getnode(n, slot));
1504 StringCbPrintfA(B->message, sizeof(B->message), "%8S (%d.%d.%d)\n",
1505 prev.name, data.fid.volume, data.fid.vnode, data.fid.unique);
1506 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1507 if (++slot > numentries(n))
1508 n = getnextnode(n), slot = 1;
1510 StringCbPrintfA(B->message, sizeof(B->message), "\n\n");
1511 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1514 /******************** entire B+tree data printer *******************/
1516 listAllBtreeValues(Tree *B)
1518 listBtreeValues(B, getleaf(B), BTERROR);
1520 #endif /* DEBUG_BTREE */
1523 findAllBtreeValues(Tree *B)
1526 Nptr n = getleaf(B), l;
1530 for (slot = 1; (n != NONODE) && num && numentries(n); num--) {
1531 if (comparekeys(B)(getkey(n, slot),prev, 0) < 0) {
1532 StringCbPrintfA(B->message, sizeof(B->message),"BOMB %8s\n", getkey(n, slot).name);
1533 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1538 prev = getkey(n, slot);
1539 l = bplus_Lookup(B, prev);
1542 StringCbPrintfA(B->message, sizeof(B->message),"BOMB %8S cannot be found\n", prev.name);
1544 StringCbPrintfA(B->message, sizeof(B->message),"BOMB lookup(%8S) finds wrong node\n", prev.name);
1545 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, B->message));
1551 if (++slot > numentries(n))
1552 n = getnextnode(n), slot = 1;
1557 * the return must be -1, 0, or 1. stricmp() in MSVC 8.0
1558 * does not return only those values.
1560 * the sorting of the tree is by case insensitive sort order
1561 * therefore, unless the strings actually match via a case
1562 * insensitive search do we want to perform the case sensitive
1563 * match. Otherwise, the search order might be considered
1564 * to be inconsistent when the EXACT_MATCH flag is set.
1567 cm_BPlusCompareNormalizedKeys(keyT key1, keyT key2, int flags)
1571 comp = cm_NormStrCmpI(key1.name, key2.name);
1572 if (comp == 0 && (flags & EXACT_MATCH))
1573 comp = cm_NormStrCmp(key1.name, key2.name);
1574 return (comp < 0 ? -1 : (comp > 0 ? 1 : 0));
1578 cm_BPlusDirLookupOriginalName(cm_dirOp_t * op, clientchar_t *centry,
1579 fschar_t **fsnameRetp)
1583 Nptr leafNode = NONODE;
1584 LARGE_INTEGER start, end;
1585 fschar_t * fsname = NULL;
1586 normchar_t * entry = NULL;
1588 if (op->scp->dirBplus == NULL ||
1589 op->dataVersion > op->scp->dirDataVersion) {
1594 entry = cm_ClientStringToNormStringAlloc(centry, -1, NULL);
1601 lock_AssertAny(&op->scp->dirlock);
1603 QueryPerformanceCounter(&start);
1605 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1606 if (leafNode != NONODE) {
1608 Nptr firstDataNode, dataNode, nextDataNode;
1612 /* Found a leaf that matches the key via a case-insensitive
1613 * match. There may be one or more data nodes that match.
1614 * If we have an exact match, return that.
1615 * If we have an ambiguous match, return an error.
1616 * If we have only one inexact match, return that.
1618 slot = getSlot(op->scp->dirBplus, leafNode);
1619 if (slot <= BTERROR) {
1620 op->scp->dirDataVersion = CM_SCACHE_VERSION_BAD;
1621 rc = (slot == BTERROR ? EINVAL : ENOENT);
1624 firstDataNode = getnode(leafNode, slot);
1626 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1628 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1632 nextDataNode = getdatanext(dataNode);
1636 fsname = getdatavalue(dataNode).fsname;
1638 bplus_lookup_hits++;
1639 } else if (count == 1) {
1640 fsname = getdatavalue(firstDataNode).fsname;
1641 rc = CM_ERROR_INEXACT_MATCH;
1642 bplus_lookup_hits_inexact++;
1644 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1645 bplus_lookup_ambiguous++;
1649 bplus_lookup_misses++;
1653 *fsnameRetp = cm_FsStrDup(fsname);
1655 QueryPerformanceCounter(&end);
1657 bplus_lookup_time += (end.QuadPart - start.QuadPart);
1667 /* Look up a file name in directory.
1670 op->scp->dirlock is read locked
1673 op->scp->dirlock is read locked
1676 cm_BPlusDirLookup(cm_dirOp_t * op, clientchar_t * centry, cm_fid_t * cfid)
1679 normchar_t * entry = NULL;
1681 Nptr leafNode = NONODE;
1682 LARGE_INTEGER start, end;
1684 if (op->scp->dirBplus == NULL ||
1685 op->dataVersion > op->scp->dirDataVersion) {
1690 entry = cm_ClientStringToNormStringAlloc(centry, -1, NULL);
1697 lock_AssertAny(&op->scp->dirlock);
1699 QueryPerformanceCounter(&start);
1701 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1702 if (leafNode != NONODE) {
1704 Nptr firstDataNode, dataNode, nextDataNode;
1708 /* Found a leaf that matches the key via a case-insensitive
1709 * match. There may be one or more data nodes that match.
1710 * If we have an exact match, return that.
1711 * If we have an ambiguous match, return an error.
1712 * If we have only one inexact match, return that.
1714 slot = getSlot(op->scp->dirBplus, leafNode);
1715 if (slot <= BTERROR) {
1716 op->scp->dirDataVersion = 0;
1717 rc = (slot == BTERROR ? EINVAL : ENOENT);
1720 firstDataNode = getnode(leafNode, slot);
1722 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1724 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1728 nextDataNode = getdatanext(dataNode);
1732 *cfid = getdatavalue(dataNode).fid;
1734 bplus_lookup_hits++;
1735 } else if (count == 1) {
1736 *cfid = getdatavalue(firstDataNode).fid;
1737 rc = CM_ERROR_INEXACT_MATCH;
1738 bplus_lookup_hits_inexact++;
1740 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1741 bplus_lookup_ambiguous++;
1745 bplus_lookup_misses++;
1748 QueryPerformanceCounter(&end);
1750 bplus_lookup_time += (end.QuadPart - start.QuadPart);
1762 op->scp->dirlock is write locked
1765 op->scp->dirlock is write locked
1767 long cm_BPlusDirCreateEntry(cm_dirOp_t * op, clientchar_t * entry, cm_fid_t * cfid)
1772 LARGE_INTEGER start, end;
1773 normchar_t * normalizedName = NULL;
1775 if (op->scp->dirBplus == NULL ||
1776 op->dataVersion != op->scp->dirDataVersion) {
1781 normalizedName = cm_ClientStringToNormStringAlloc(entry, -1, NULL);
1782 if (!normalizedName) {
1786 key.name = normalizedName;
1788 lock_AssertWrite(&op->scp->dirlock);
1790 cm_SetFid(&data.fid, cfid->cell, cfid->volume, cfid->vnode, cfid->unique);
1791 data.cname = cm_ClientStrDup(entry);
1792 data.fsname = cm_ClientStringToFsStringAlloc(entry, -1, NULL);
1793 data.shortform = FALSE;
1795 QueryPerformanceCounter(&start);
1796 bplus_create_entry++;
1798 insert(op->scp->dirBplus, key, data);
1800 if (cm_shortNames && !cm_Is8Dot3(entry)) {
1802 clientchar_t wshortName[13];
1804 dfid.vnode = htonl(data.fid.vnode);
1805 dfid.unique = htonl(data.fid.unique);
1807 cm_Gen8Dot3NameIntW(entry, &dfid, wshortName, NULL);
1809 key.name = wshortName;
1811 data.cname = cm_ClientStrDup(entry);
1812 data.fsname = cm_ClientStringToFsStringAlloc(entry, -1, NULL);
1813 data.shortform = TRUE;
1815 insert(op->scp->dirBplus, key, data);
1818 QueryPerformanceCounter(&end);
1820 bplus_create_time += (end.QuadPart - start.QuadPart);
1824 if (normalizedName != NULL)
1825 free(normalizedName);
1832 op->scp->dirlock is write locked
1835 op->scp->dirlock is write locked
1837 int cm_BPlusDirDeleteEntry(cm_dirOp_t * op, clientchar_t *centry)
1841 Nptr leafNode = NONODE;
1842 LARGE_INTEGER start, end;
1843 normchar_t * normalizedEntry = NULL;
1845 if (op->scp->dirBplus == NULL ||
1846 op->dataVersion != op->scp->dirDataVersion) {
1851 normalizedEntry = cm_ClientStringToNormStringAlloc(centry, -1, NULL);
1852 if (!normalizedEntry) {
1856 key.name = normalizedEntry;
1858 lock_AssertWrite(&op->scp->dirlock);
1860 QueryPerformanceCounter(&start);
1862 bplus_remove_entry++;
1864 if (op->scp->dirBplus) {
1865 if (!cm_Is8Dot3(centry)) {
1868 clientchar_t shortName[13];
1870 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1871 if (leafNode != NONODE) {
1873 Nptr firstDataNode, dataNode, nextDataNode;
1877 /* Found a leaf that matches the key via a case-insensitive
1878 * match. There may be one or more data nodes that match.
1879 * If we have an exact match, return that.
1880 * If we have an ambiguous match, return an error.
1881 * If we have only one inexact match, return that.
1883 slot = getSlot(op->scp->dirBplus, leafNode);
1884 if (slot <= BTERROR) {
1885 op->scp->dirDataVersion = 0;
1889 firstDataNode = getnode(leafNode, slot);
1891 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1893 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1897 nextDataNode = getdatanext(dataNode);
1901 fid = getdatavalue(dataNode).fid;
1903 } else if (count == 1) {
1904 fid = getdatavalue(firstDataNode).fid;
1905 rc = CM_ERROR_INEXACT_MATCH;
1907 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1910 if (rc != CM_ERROR_AMBIGUOUS_FILENAME) {
1911 /* delete first the long name and then the short name */
1912 delete(op->scp->dirBplus, key);
1914 if (cm_shortNames) {
1915 dfid.vnode = htonl(fid.vnode);
1916 dfid.unique = htonl(fid.unique);
1917 cm_Gen8Dot3NameIntW(centry, &dfid, shortName, NULL);
1919 key.name = shortName;
1920 delete(op->scp->dirBplus, key);
1925 clientchar_t * cname = NULL;
1927 /* We need to lookup the 8dot3 name to determine what the
1928 * matching long name is
1930 leafNode = bplus_Lookup(op->scp->dirBplus, key);
1931 if (leafNode != NONODE) {
1933 Nptr firstDataNode, dataNode, nextDataNode;
1937 /* Found a leaf that matches the key via a case-insensitive
1938 * match. There may be one or more data nodes that match.
1939 * If we have an exact match, return that.
1940 * If we have an ambiguous match, return an error.
1941 * If we have only one inexact match, return that.
1943 slot = getSlot(op->scp->dirBplus, leafNode);
1944 if (slot <= BTERROR) {
1945 op->scp->dirDataVersion = 0;
1950 firstDataNode = getnode(leafNode, slot);
1952 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
1954 if (!comparekeys(op->scp->dirBplus)(key, getdatakey(dataNode), EXACT_MATCH) ) {
1958 nextDataNode = getdatanext(dataNode);
1962 cname = getdatavalue(dataNode).cname;
1964 } else if (count == 1) {
1965 cname = getdatavalue(firstDataNode).cname;
1966 rc = CM_ERROR_INEXACT_MATCH;
1968 rc = CM_ERROR_AMBIGUOUS_FILENAME;
1972 if (rc != CM_ERROR_AMBIGUOUS_FILENAME) {
1974 normchar_t * longNName = cm_NormalizeStringAlloc(cname, -1, NULL);
1976 key.name = longNName;
1977 delete(op->scp->dirBplus, key);
1978 key.name = normalizedEntry;
1983 delete(op->scp->dirBplus, key);
1988 QueryPerformanceCounter(&end);
1990 bplus_remove_time += (end.QuadPart - start.QuadPart);
1993 if (normalizedEntry)
1994 free(normalizedEntry);
2001 int cm_BPlusDirFoo(struct cm_scache *scp, struct cm_dirEntry *dep,
2002 void *dummy, osi_hyper_t *entryOffsetp)
2006 normchar_t *normalized_name=NULL;
2008 cm_SetFid(&data.fid, scp->fid.cell, scp->fid.volume,
2009 ntohl(dep->fid.vnode), ntohl(dep->fid.unique));
2013 normalized_name = cm_FsStringToNormStringAlloc(dep->name, -1, NULL);
2015 if (normalized_name) {
2016 key.name = normalized_name;
2024 data.cname = cm_FsStringToClientStringAlloc(dep->name, -1, NULL);
2025 if (data.cname == NULL) {
2031 data.fsname = cm_FsStrDup(dep->name);
2032 data.shortform = FALSE;
2034 /* the Write lock is held in cm_BPlusDirBuildTree() */
2035 insert(scp->dirBplus, key, data);
2037 if (cm_shortNames && !cm_Is8Dot3(data.cname)) {
2039 wchar_t wshortName[13];
2041 dfid.vnode = dep->fid.vnode;
2042 dfid.unique = dep->fid.unique;
2044 cm_Gen8Dot3NameIntW(data.cname, &dfid, wshortName, NULL);
2046 key.name = wshortName;
2047 data.cname = cm_FsStringToClientStringAlloc(dep->name, -1, NULL);
2049 data.fsname = cm_FsStrDup(dep->name);
2050 data.shortform = TRUE;
2052 insert(scp->dirBplus, key, data);
2056 if (normalized_name)
2057 free(normalized_name);
2060 findAllBtreeValues(scp->dirBplus);
2067 * scp->dirlock must be writeLocked before call
2069 * scp->mutex must not be held
2071 long cm_BPlusDirBuildTree(cm_scache_t *scp, cm_user_t *userp, cm_req_t* reqp)
2075 LARGE_INTEGER start, end;
2077 osi_assertx(scp->dirBplus == NULL, "cm_BPlusDirBuildTree called on non-empty tree");
2079 lock_AssertWrite(&scp->dirlock);
2081 QueryPerformanceCounter(&start);
2084 if (scp->dirBplus == NULL) {
2085 scp->dirBplus = initBtree(64, MAX_FANOUT, cm_BPlusCompareNormalizedKeys);
2087 if (scp->dirBplus == NULL) {
2091 thyper.HighPart = 0;
2092 rc = cm_ApplyDir(scp, cm_BPlusDirFoo, NULL, &thyper, userp, reqp, NULL);
2095 QueryPerformanceCounter(&end);
2097 bplus_build_time += (end.QuadPart - start.QuadPart);
2100 cm_BPlusDirEnumTest(scp, 1);
2105 int cm_MemDumpBPlusStats(FILE *outputFile, char *cookie, int lock)
2110 StringCbPrintfA(output, sizeof(output), "%s - B+ Lookup Hits: %-8d\r\n", cookie, bplus_lookup_hits);
2111 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2112 StringCbPrintfA(output, sizeof(output), "%s - Inexact Hits: %-8d\r\n", cookie, bplus_lookup_hits_inexact);
2113 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2114 StringCbPrintfA(output, sizeof(output), "%s - Ambiguous Hits: %-8d\r\n", cookie, bplus_lookup_ambiguous);
2115 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2116 StringCbPrintfA(output, sizeof(output), "%s - Misses: %-8d\r\n", cookie, bplus_lookup_misses);
2117 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2118 StringCbPrintfA(output, sizeof(output), "%s - Create: %-8d\r\n", cookie, bplus_create_entry);
2119 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2120 StringCbPrintfA(output, sizeof(output), "%s - Remove: %-8d\r\n", cookie, bplus_remove_entry);
2121 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2122 StringCbPrintfA(output, sizeof(output), "%s - Build Tree: %-8d\r\n", cookie, bplus_build_tree);
2123 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2124 StringCbPrintfA(output, sizeof(output), "%s - Free Tree: %-8d\r\n", cookie, bplus_free_tree);
2125 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2126 StringCbPrintfA(output, sizeof(output), "%s - DV Error: %-8d\r\n", cookie, bplus_dv_error);
2127 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2129 StringCbPrintfA(output, sizeof(output), "%s - B+ Time Lookup: %-16I64d\r\n", cookie, bplus_lookup_time);
2130 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2131 StringCbPrintfA(output, sizeof(output), "%s - Create: %-16I64d\r\n", cookie, bplus_create_time);
2132 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2133 StringCbPrintfA(output, sizeof(output), "%s - Remove: %-16I64d\r\n", cookie, bplus_remove_time);
2134 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2135 StringCbPrintfA(output, sizeof(output), "%s - Build: %-16I64d\r\n", cookie, bplus_build_time);
2136 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2137 StringCbPrintfA(output, sizeof(output), "%s - Free: %-16I64d\r\n", cookie, bplus_free_time);
2138 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
2143 void cm_BPlusDumpStats(void)
2145 afsi_log("B+ Lookup Hits: %-8d", bplus_lookup_hits);
2146 afsi_log(" Inexact Hits: %-8d", bplus_lookup_hits_inexact);
2147 afsi_log(" Ambiguous Hits: %-8d", bplus_lookup_ambiguous);
2148 afsi_log(" Misses: %-8d", bplus_lookup_misses);
2149 afsi_log(" Create: %-8d", bplus_create_entry);
2150 afsi_log(" Remove: %-8d", bplus_remove_entry);
2151 afsi_log(" Build Tree: %-8d", bplus_build_tree);
2152 afsi_log(" Free Tree: %-8d", bplus_free_tree);
2153 afsi_log(" DV Error: %-8d", bplus_dv_error);
2155 afsi_log("B+ Time Lookup: %-16I64d", bplus_lookup_time);
2156 afsi_log(" Create: %-16I64d", bplus_create_time);
2157 afsi_log(" Remove: %-16I64d", bplus_remove_time);
2158 afsi_log(" Build: %-16I64d", bplus_build_time);
2159 afsi_log(" Free: %-16I64d", bplus_free_time);
2162 static cm_direnum_t *
2163 cm_BPlusEnumAlloc(afs_uint32 entries)
2165 cm_direnum_t * enump;
2169 size = sizeof(cm_direnum_t);
2171 size = sizeof(cm_direnum_t)+(entries-1)*sizeof(cm_direnum_entry_t);
2172 enump = (cm_direnum_t *)malloc(size);
2174 memset(enump, 0, size);
2175 enump->count = entries;
2181 cm_BPlusDirEnumerate(cm_scache_t *dscp, cm_user_t *userp, cm_req_t *reqp,
2182 afs_uint32 locked, clientchar_t * maskp,
2183 afs_uint32 fetchStatus, cm_direnum_t **enumpp)
2185 afs_uint32 count = 0, slot, numentries;
2186 Nptr leafNode = NONODE, nextLeafNode;
2187 Nptr firstDataNode, dataNode, nextDataNode;
2188 cm_direnum_t * enump = NULL;
2192 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate start");
2194 /* Read lock the bplus tree so the data can't change */
2196 lock_ObtainRead(&dscp->dirlock);
2199 * Hold a reference to the directory so that it won't be
2200 * recycled while the enumeration is active.
2202 cm_HoldSCache(dscp);
2205 if (dscp->dirBplus == NULL) {
2206 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate No BPlus Tree");
2207 rc = CM_ERROR_WOULDBLOCK;
2211 /* Compute the number of entries */
2212 for (count = 0, leafNode = getleaf(dscp->dirBplus); leafNode; leafNode = nextLeafNode) {
2214 for ( slot = 1, numentries = numentries(leafNode); slot <= numentries; slot++) {
2215 firstDataNode = getnode(leafNode, slot);
2217 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
2219 /* There can be two data nodes for one file. One for
2220 the long name and one for the short name. We only
2221 include one of these for the enumeration */
2223 if (maskp == NULL) {
2224 if (!getdatavalue(dataNode).shortform)
2227 if (!getdatavalue(dataNode).shortform &&
2228 cm_MatchMask(getdatavalue(dataNode).cname, maskp, CM_FLAG_CASEFOLD))
2231 nextDataNode = getdatanext(dataNode);
2235 nextLeafNode = getnextnode(leafNode);
2238 StringCbPrintfA(buffer, sizeof(buffer), "BPlusTreeEnumerate count = %d", count);
2239 osi_Log1(afsd_logp, "BPlus: %s", osi_LogSaveString(afsd_logp, buffer));
2241 /* Allocate the enumeration object */
2242 enump = cm_BPlusEnumAlloc(count);
2243 if (enump == NULL) {
2244 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate Alloc failed");
2249 /* Copy the name and fid for each cname entry into the enumeration */
2250 for (count = 0, leafNode = getleaf(dscp->dirBplus); leafNode; leafNode = nextLeafNode) {
2252 for ( slot = 1, numentries = numentries(leafNode); slot <= numentries; slot++) {
2253 firstDataNode = getnode(leafNode, slot);
2255 for ( dataNode = firstDataNode; dataNode; dataNode = nextDataNode) {
2256 clientchar_t * name;
2259 if (maskp == NULL) {
2260 if (!getdatavalue(dataNode).shortform) {
2264 if (!getdatavalue(dataNode).shortform &&
2265 cm_MatchMask(getdatavalue(dataNode).cname, maskp, CM_FLAG_CASEFOLD)) {
2271 name = cm_ClientStrDup(getdatavalue(dataNode).cname);
2274 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate strdup failed");
2279 enump->entry[count].name = name;
2280 enump->entry[count].fid = getdatavalue(dataNode).fid;
2282 if (cm_shortNames) {
2283 if (!cm_Is8Dot3(name)) {
2286 dfid.vnode = htonl(getdatavalue(dataNode).fid.vnode);
2287 dfid.unique = htonl(getdatavalue(dataNode).fid.unique);
2289 cm_Gen8Dot3NameIntW(name, &dfid, enump->entry[count].shortName, NULL);
2291 StringCbCopyW(enump->entry[count].shortName,
2292 sizeof(enump->entry[count].shortName),
2299 nextDataNode = getdatanext(dataNode);
2303 nextLeafNode = getnextnode(leafNode);
2307 enump->userp = userp;
2308 enump->reqFlags = reqp->flags;
2309 enump->fetchStatus = fetchStatus;
2310 enump->dataVersion = dscp->dirDataVersion;
2314 lock_ReleaseRead(&dscp->dirlock);
2316 /* if we failed, cleanup any mess */
2318 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate rc != 0");
2321 * release the directory because we failed to generate an enumeration object.
2322 * adjust the directory position in the queue to ensure it doesn't get pushed
2323 * out by the allocation of a large number of cm_scache objects.
2325 lock_ObtainWrite(&cm_scacheLock);
2326 cm_AdjustScacheLRU(dscp);
2327 cm_ReleaseSCacheNoLock(dscp);
2328 lock_ReleaseWrite(&cm_scacheLock);
2329 cm_ReleaseUser(userp);
2331 for ( count = 0; count < enump->count && enump->entry[count].name; count++ ) {
2332 free(enump->entry[count].name);
2339 osi_Log0(afsd_logp, "cm_BPlusDirEnumerate end");
2345 cm_BPlusDirEnumBulkStat(cm_direnum_t *enump)
2347 cm_scache_t *dscp = enump->dscp;
2348 cm_user_t *userp = enump->userp;
2349 cm_bulkStat_t *bsp = NULL;
2350 afs_uint32 ** bs_errorCodep = NULL;
2351 afs_uint32 ** bs_flagsp = NULL;
2352 afs_uint32 dscp_errorCode = 0;
2353 afs_uint32 dscp_flags = 0;
2355 afs_uint32 code = 0;
2359 afs_int32 nobulkstat = 0;
2362 req.flags = enump->reqFlags;
2364 if ( dscp->fid.cell == AFS_FAKE_ROOT_CELL_ID )
2367 bsp = malloc(sizeof(cm_bulkStat_t));
2372 memset(bsp, 0, sizeof(cm_bulkStat_t));
2375 bs_errorCodep = malloc(sizeof(afs_uint32 *) * AFSCBMAX);
2376 if (!bs_errorCodep) {
2381 bs_flagsp = malloc(sizeof(afs_uint32 *) * AFSCBMAX);
2388 * In order to prevent the directory callback from expiring
2389 * on really large directories with many symlinks to mount
2390 * points such as /afs/andrew.cmu.edu/usr/, always include
2391 * the directory fid in the search.
2393 bsp->fids[0].Volume = dscp->fid.volume;
2394 bsp->fids[0].Vnode = dscp->fid.vnode;
2395 bsp->fids[0].Unique = dscp->fid.unique;
2396 bs_errorCodep[0] = &dscp_errorCode;
2397 bs_flagsp[0] = &dscp_flags;
2401 for ( count = 0; count < enump->count; count++ ) {
2402 if ( !wcscmp(L".", enump->entry[count].name) || !wcscmp(L"..", enump->entry[count].name) ) {
2406 tscp = cm_FindSCache(&enump->entry[count].fid);
2408 if (lock_TryWrite(&tscp->rw)) {
2409 /* we have an entry that we can look at */
2410 if (!cm_EAccesFindEntry(userp, &tscp->fid) && cm_HaveCallback(tscp)) {
2411 /* we have a callback on it. Don't bother
2412 * fetching this stat entry, since we're happy
2413 * with the info we have.
2415 lock_ReleaseWrite(&tscp->rw);
2416 cm_ReleaseSCache(tscp);
2417 enump->entry[count].flags |= CM_DIRENUM_FLAG_GOT_STATUS;
2418 enump->entry[count].errorCode = 0;
2423 code = cm_SyncOp(tscp, NULL, userp, &req, 0,
2424 CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS);
2425 lock_ReleaseWrite(&tscp->rw);
2426 cm_ReleaseSCache(tscp);
2427 enump->entry[count].flags |= CM_DIRENUM_FLAG_GOT_STATUS;
2428 enump->entry[count].errorCode = code;
2432 lock_ReleaseWrite(&tscp->rw);
2434 cm_ReleaseSCache(tscp);
2438 bsp->fids[i].Volume = enump->entry[count].fid.volume;
2439 bsp->fids[i].Vnode = enump->entry[count].fid.vnode;
2440 bsp->fids[i].Unique = enump->entry[count].fid.unique;
2441 bs_errorCodep[i] = &enump->entry[count].errorCode;
2442 bs_flagsp[bsp->counter] = &enump->entry[i].flags;
2443 enump->entry[count].flags |= CM_DIRENUM_FLAG_GOT_STATUS;
2445 if (bsp->counter == AFSCBMAX) {
2446 code = cm_TryBulkStatRPC(dscp, bsp, userp, &req);
2447 if (code == CM_ERROR_BULKSTAT_FAILURE) {
2449 * If bulk stat cannot be used for this directory
2450 * we must perform individual fetch status calls.
2451 * Restart from the beginning of the enumeration.
2455 for (i=0; i<bsp->counter; i++) {
2456 *(bs_flagsp[i]) &= ~CM_DIRENUM_FLAG_GOT_STATUS;
2462 /* on any other error, exit */
2465 for ( i=0; i<bsp->counter; i++) {
2466 *(bs_errorCodep[i]) = cm_MapRPCError(bsp->stats[i].errorCode, &req);
2469 if (dscp_errorCode) {
2470 code = dscp_errorCode;
2473 memset(bsp, 0, sizeof(cm_bulkStat_t));
2477 * In order to prevent the directory callback from expiring
2478 * on really large directories with many symlinks to mount
2479 * points such as /afs/andrew.cmu.edu/usr/, always include
2480 * the directory fid in the search.
2482 bsp->fids[0].Volume = dscp->fid.volume;
2483 bsp->fids[0].Vnode = dscp->fid.vnode;
2484 bsp->fids[0].Unique = dscp->fid.unique;
2485 bs_errorCodep[0] = &dscp_errorCode;
2490 if (bsp->counter > 0) {
2491 code = cm_TryBulkStatRPC(dscp, bsp, userp, &req);
2492 if (code == CM_ERROR_BULKSTAT_FAILURE) {
2494 * If bulk stat cannot be used for this directory
2495 * we must perform individual fetch status calls.
2496 * Restart from the beginning of the enumeration.
2500 for (i=0; i<bsp->counter; i++) {
2501 *(bs_flagsp[i]) &= ~CM_DIRENUM_FLAG_GOT_STATUS;
2509 for ( i=0; i<bsp->counter; i++) {
2510 *(bs_errorCodep[i]) = cm_MapRPCError(bsp->stats[i].errorCode, &req);
2513 if (dscp_errorCode) {
2514 code = dscp_errorCode;
2523 free(bs_errorCodep);
2531 * Similar to cm_BPlusDirEnumBulkStat() except that only
2532 * one RPC is issued containing the provided scp FID and up to
2533 * AFSCBMAX - 2 other FIDs for which the status info has yet
2537 cm_BPlusDirEnumBulkStatOne(cm_direnum_t *enump, cm_scache_t *scp)
2539 cm_scache_t *dscp = enump->dscp;
2540 cm_user_t *userp = enump->userp;
2541 cm_bulkStat_t *bsp = NULL;
2542 afs_uint32 ** bs_errorCodep = NULL;
2543 afs_uint32 ** bs_flagsp = NULL;
2544 afs_uint32 dscp_errorCode = 0;
2545 afs_uint32 dscp_flags = 0;
2546 afs_uint32 scp_errorCode = 0;
2547 afs_uint32 scp_flags = 0;
2548 afs_uint32 code = 0;
2553 if ( dscp->fid.cell == AFS_FAKE_ROOT_CELL_ID )
2557 req.flags = enump->reqFlags;
2559 bsp = malloc(sizeof(cm_bulkStat_t));
2564 memset(bsp, 0, sizeof(cm_bulkStat_t));
2567 bs_errorCodep = malloc(sizeof(afs_uint32 *) * AFSCBMAX);
2568 if (!bs_errorCodep) {
2573 bs_flagsp = malloc(sizeof(afs_uint32 *) * AFSCBMAX);
2580 * In order to prevent the directory callback from expiring
2581 * on really large directories with many symlinks to mount
2582 * points such as /afs/andrew.cmu.edu/usr/, always include
2583 * the directory fid in the search.
2585 bsp->fids[0].Volume = dscp->fid.volume;
2586 bsp->fids[0].Vnode = dscp->fid.vnode;
2587 bsp->fids[0].Unique = dscp->fid.unique;
2588 bs_errorCodep[0] = &dscp_errorCode;
2589 bs_flagsp[0] = &dscp_flags;
2593 * There is an assumption that this FID is located
2594 * within the directory enumeration but it could be
2595 * the case that the enumeration is out of date and
2596 * the FID is not listed. So we explicitly add it
2597 * after the directory FID and then skip it later
2600 bsp->fids[1].Volume = scp->fid.volume;
2601 bsp->fids[1].Vnode = scp->fid.vnode;
2602 bsp->fids[1].Unique = scp->fid.unique;
2603 bs_errorCodep[1] = &scp_errorCode;
2604 bs_flagsp[1] = &scp_flags;
2607 if (enump->count <= AFSCBMAX - 1) {
2611 * Find the requested FID in the enumeration and start from there.
2613 for (i=0; i < enump->count && cm_FidCmp(&scp->fid, &enump->entry[i].fid); i++);
2616 for ( ; bsp->counter < AFSCBMAX && i < enump->count; i++) {
2617 if ( !wcscmp(L".", enump->entry[i].name) || !wcscmp(L"..", enump->entry[i].name) ) {
2621 tscp = cm_FindSCache(&enump->entry[i].fid);
2624 cm_ReleaseSCache(tscp);
2628 if (lock_TryWrite(&tscp->rw)) {
2629 /* we have an entry that we can look at */
2630 if (!cm_EAccesFindEntry(userp, &tscp->fid) && cm_HaveCallback(tscp)) {
2631 /* we have a callback on it. Don't bother
2632 * fetching this stat entry, since we're happy
2633 * with the info we have.
2635 lock_ReleaseWrite(&tscp->rw);
2636 cm_ReleaseSCache(tscp);
2637 enump->entry[i].flags |= CM_DIRENUM_FLAG_GOT_STATUS;
2638 enump->entry[i].errorCode = 0;
2641 lock_ReleaseWrite(&tscp->rw);
2643 cm_ReleaseSCache(tscp);
2646 bsp->fids[bsp->counter].Volume = enump->entry[i].fid.volume;
2647 bsp->fids[bsp->counter].Vnode = enump->entry[i].fid.vnode;
2648 bsp->fids[bsp->counter].Unique = enump->entry[i].fid.unique;
2649 bs_errorCodep[bsp->counter] = &enump->entry[i].errorCode;
2650 bs_flagsp[bsp->counter] = &enump->entry[i].flags;
2651 enump->entry[i].flags |= CM_DIRENUM_FLAG_GOT_STATUS;
2655 if (bsp->counter > 0) {
2656 code = cm_TryBulkStatRPC(dscp, bsp, userp, &req);
2657 /* Now process any errors that might have occurred */
2658 if (code == CM_ERROR_BULKSTAT_FAILURE) {
2659 for (i=2; i<bsp->counter; i++) {
2660 *(bs_flagsp[i]) &= ~CM_DIRENUM_FLAG_GOT_STATUS;
2663 lock_ObtainWrite(&scp->rw);
2664 code = cm_SyncOp(scp, NULL, userp, &req, 0,
2665 CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS);
2666 lock_ReleaseWrite(&scp->rw);
2673 for ( i=0; i<bsp->counter; i++) {
2674 *(bs_errorCodep[i]) = cm_MapRPCError(bsp->stats[i].errorCode, &req);
2677 /* Check if there was an error on the requested FID, if so return it */
2678 if ( scp_errorCode ) {
2679 code = scp_errorCode;
2688 free(bs_errorCodep);
2696 cm_BPlusDirEnumBulkStatNext(cm_direnum_t *enump)
2698 cm_scache_t *dscp = enump->dscp;
2699 cm_user_t *userp = enump->userp;
2700 cm_bulkStat_t *bsp = NULL;
2701 afs_uint32 ** bs_errorCodep = NULL;
2702 afs_uint32 ** bs_flagsp = NULL;
2703 afs_uint32 dscp_errorCode = 0;
2704 afs_uint32 dscp_flags = 0;
2706 afs_uint32 code = 0;
2709 afs_int32 next = -1;
2712 if ( dscp->fid.cell == AFS_FAKE_ROOT_CELL_ID )
2716 req.flags = enump->reqFlags;
2718 bsp = malloc(sizeof(cm_bulkStat_t));
2723 memset(bsp, 0, sizeof(cm_bulkStat_t));
2726 bs_errorCodep = malloc(sizeof(afs_uint32 *) * AFSCBMAX);
2727 if (!bs_errorCodep) {
2732 bs_flagsp = malloc(sizeof(afs_uint32 *) * AFSCBMAX);
2739 * In order to prevent the directory callback from expiring
2740 * on really large directories with many symlinks to mount
2741 * points such as /afs/andrew.cmu.edu/usr/, always include
2742 * the directory fid in the search.
2744 bsp->fids[0].Volume = dscp->fid.volume;
2745 bsp->fids[0].Vnode = dscp->fid.vnode;
2746 bsp->fids[0].Unique = dscp->fid.unique;
2747 bs_errorCodep[0] = &dscp_errorCode;
2748 bs_flagsp[0] = &dscp_flags;
2751 for ( count = enump->next; count < enump->count && bsp->counter < AFSCBMAX; count++ ) {
2752 if ( !wcscmp(L".", enump->entry[count].name) || !wcscmp(L"..", enump->entry[count].name) ) {
2758 tscp = cm_FindSCache(&enump->entry[count].fid);
2760 if (lock_TryWrite(&tscp->rw)) {
2761 /* we have an entry that we can look at */
2762 if (!cm_EAccesFindEntry(userp, &tscp->fid) && cm_HaveCallback(tscp)) {
2763 /* we have a callback on it. Don't bother
2764 * fetching this stat entry, since we're happy
2765 * with the info we have.
2767 lock_ReleaseWrite(&tscp->rw);
2768 cm_ReleaseSCache(tscp);
2769 enump->entry[count].flags |= CM_DIRENUM_FLAG_GOT_STATUS;
2770 enump->entry[count].errorCode = 0;
2773 lock_ReleaseWrite(&tscp->rw);
2775 cm_ReleaseSCache(tscp);
2778 bsp->fids[bsp->counter].Volume = enump->entry[count].fid.volume;
2779 bsp->fids[bsp->counter].Vnode = enump->entry[count].fid.vnode;
2780 bsp->fids[bsp->counter].Unique = enump->entry[count].fid.unique;
2781 bs_errorCodep[bsp->counter] = &enump->entry[count].errorCode;
2782 bs_flagsp[bsp->counter] = &enump->entry[count].flags;
2783 enump->entry[count].flags |= CM_DIRENUM_FLAG_GOT_STATUS;
2787 if (bsp->counter > 0) {
2788 code = cm_TryBulkStatRPC(dscp, bsp, userp, &req);
2789 /* Now process any errors that might have occurred */
2790 if (code == CM_ERROR_BULKSTAT_FAILURE) {
2791 for (i=0; i<bsp->counter; i++) {
2792 *(bs_flagsp[i]) &= ~CM_DIRENUM_FLAG_GOT_STATUS;
2795 code = cm_GetSCache(&enump->entry[next].fid, NULL, &tscp, userp, &req);
2797 if (lock_TryWrite(&tscp->rw)) {
2798 code = cm_SyncOp(tscp, NULL, userp, &req, 0,
2799 CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS);
2800 lock_ReleaseWrite(&tscp->rw);
2801 *(bs_errorCodep[1]) = code;
2802 *(bs_flagsp[1]) |= CM_DIRENUM_FLAG_GOT_STATUS;
2804 cm_ReleaseSCache(tscp);
2806 *(bs_errorCodep[1]) = code;
2807 *(bs_flagsp[1]) |= CM_DIRENUM_FLAG_GOT_STATUS;
2815 for ( i=0; i<bsp->counter; i++) {
2816 *(bs_errorCodep[i]) = cm_MapRPCError(bsp->stats[i].errorCode, &req);
2819 if (dscp_errorCode) {
2820 code = dscp_errorCode;
2829 free(bs_errorCodep);
2837 cm_BPlusDirNextEnumEntry(cm_direnum_t *enump, cm_direnum_entry_t **entrypp)
2841 if (enump == NULL || entrypp == NULL || enump->next >= enump->count) {
2844 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry invalid input");
2845 return CM_ERROR_INVAL;
2848 if (enump->fetchStatus &&
2849 !(enump->entry[enump->next].flags & CM_DIRENUM_FLAG_GOT_STATUS)) {
2850 code = cm_BPlusDirEnumBulkStatNext(enump);
2853 *entrypp = &enump->entry[enump->next++];
2854 if ( enump->next == enump->count ) {
2855 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry STOPNOW");
2856 return CM_ERROR_STOPNOW;
2860 (*entrypp)->errorCode = code;
2861 osi_Log1(afsd_logp, "cm_BPlusDirNextEnumEntry ERROR 0x%x", code);
2863 osi_Log0(afsd_logp, "cm_BPlusDirNextEnumEntry SUCCESS");
2870 cm_BPlusDirPeekNextEnumEntry(cm_direnum_t *enump, cm_direnum_entry_t **entrypp)
2874 if (enump == NULL || entrypp == NULL || enump->next >= enump->count) {
2877 osi_Log0(afsd_logp, "cm_BPlusDirPeekNextEnumEntry invalid input");
2878 return CM_ERROR_INVAL;
2881 if (enump->fetchStatus &&
2882 !(enump->entry[enump->next].flags & CM_DIRENUM_FLAG_GOT_STATUS)) {
2883 code = cm_BPlusDirEnumBulkStatNext(enump);
2888 *entrypp = &enump->entry[enump->next];
2889 if ( enump->next == enump->count ) {
2890 osi_Log0(afsd_logp, "cm_BPlusDirPeekNextEnumEntry STOPNOW");
2891 return CM_ERROR_STOPNOW;
2894 osi_Log0(afsd_logp, "cm_BPlusDirPeekNextEnumEntry SUCCESS");
2900 cm_BPlusDirFreeEnumeration(cm_direnum_t *enump)
2904 osi_Log0(afsd_logp, "cm_BPlusDirFreeEnumeration");
2908 * Release the directory object but first adjust its position
2909 * in the LRU queue to ensure that it does not get stuck at the
2910 * end due to the allocation of a large number of cm_scache
2911 * entries in the directory.
2913 lock_ObtainWrite(&cm_scacheLock);
2914 cm_AdjustScacheLRU(enump->dscp);
2915 cm_ReleaseSCacheNoLock(enump->dscp);
2916 lock_ReleaseWrite(&cm_scacheLock);
2917 cm_ReleaseUser(enump->userp);
2919 for ( count = 0; count < enump->count && enump->entry[count].name; count++ ) {
2920 free(enump->entry[count].name);
2928 cm_BPlusDirEnumTest(cm_scache_t * dscp, cm_user_t *userp, cm_req_t *reqp, afs_uint32 locked)
2930 cm_direnum_t * enump = NULL;
2931 cm_direnum_entry_t * entryp;
2935 osi_Log0(afsd_logp, "cm_BPlusDirEnumTest start");
2937 for (code = cm_BPlusDirEnumerate(dscp, userp, reqp, locked, NULL, 1, &enump); code == 0; ) {
2938 code = cm_BPlusDirNextEnumEntry(enump, &entryp);
2939 if (code == 0 || code == CM_ERROR_STOPNOW) {
2942 char * type = "ScpNotFound";
2945 scp = cm_FindSCache(&entryp->fid);
2947 switch (scp->fileType) {
2948 case CM_SCACHETYPE_FILE :
2951 case CM_SCACHETYPE_DIRECTORY :
2954 case CM_SCACHETYPE_SYMLINK :
2957 case CM_SCACHETYPE_MOUNTPOINT:
2958 type = "MountPoint";
2960 case CM_SCACHETYPE_DFSLINK :
2963 case CM_SCACHETYPE_INVALID :
2971 dv = scp->dataVersion;
2972 cm_ReleaseSCache(scp);
2975 StringCbPrintfA(buffer, sizeof(buffer), "'%S' Fid = (%d,%d,%d,%d) Short = '%S' Type %s DV %I64d",
2977 entryp->fid.cell, entryp->fid.volume, entryp->fid.vnode, entryp->fid.unique,
2982 osi_Log0(afsd_logp, osi_LogSaveString(afsd_logp, buffer));
2987 cm_BPlusDirFreeEnumeration(enump);
2989 osi_Log0(afsd_logp, "cm_BPlusDirEnumTest end");
2993 #endif /* USE_BPLUS */