001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.kahadb.index;
018
019 import java.io.DataInput;
020 import java.io.DataOutput;
021 import java.io.IOException;
022 import java.io.PrintWriter;
023 import java.util.Arrays;
024 import java.util.Iterator;
025 import java.util.Map;
026 import java.util.NoSuchElementException;
027 import java.util.Map.Entry;
028
029 import org.apache.kahadb.index.BTreeIndex.Prefixer;
030 import org.apache.kahadb.page.Page;
031 import org.apache.kahadb.page.Transaction;
032 import org.apache.kahadb.util.VariableMarshaller;
033
034
035 /**
036 * The BTreeNode class represents a node in the BTree object graph. It is stored in
037 * one Page of a PageFile.
038 */
039 public final class BTreeNode<Key,Value> {
040
041 // The index that this node is part of.
042 private final BTreeIndex<Key,Value> index;
043 // The parent node or null if this is the root node of the BTree
044 private BTreeNode<Key,Value> parent;
045 // The page associated with this node
046 private Page<BTreeNode<Key,Value>> page;
047
048 // Order list of keys in the node
049 private Key[] keys;
050 // Values associated with the Keys. Null if this is a branch node.
051 private Value[] values;
052 // nodeId pointers to children BTreeNodes. Null if this is a leaf node.
053 private long[] children;
054 // The next leaf node after this one. Used for fast iteration of the entries.
055 private long next = -1;
056
057 private final class KeyValueEntry implements Map.Entry<Key, Value> {
058 private final Key key;
059 private final Value value;
060
061 public KeyValueEntry(Key key, Value value) {
062 this.key = key;
063 this.value = value;
064 }
065
066 public Key getKey() {
067 return key;
068 }
069
070 public Value getValue() {
071 return value;
072 }
073
074 public Value setValue(Value value) {
075 throw new UnsupportedOperationException();
076 }
077
078 }
079
080 private final class BTreeIterator implements Iterator<Map.Entry<Key, Value>> {
081
082 private final Transaction tx;
083 BTreeNode<Key,Value> current;
084 int nextIndex;
085 Map.Entry<Key,Value> nextEntry;
086
087 private BTreeIterator(Transaction tx, BTreeNode<Key,Value> current, int nextIndex) {
088 this.tx = tx;
089 this.current = current;
090 this.nextIndex=nextIndex;
091 }
092
093 synchronized private void findNextPage() {
094 if( nextEntry!=null ) {
095 return;
096 }
097
098 try {
099 while( current!=null ) {
100 if( nextIndex >= current.keys.length ) {
101 // we need to roll to the next leaf..
102 if( current.next >= 0 ) {
103 current = index.loadNode(tx, current.next, null);
104 nextIndex=0;
105 } else {
106 break;
107 }
108 } else {
109 nextEntry = new KeyValueEntry(current.keys[nextIndex], current.values[nextIndex]);
110 nextIndex++;
111 break;
112 }
113
114 }
115 } catch (IOException e) {
116 }
117 }
118
119 public boolean hasNext() {
120 findNextPage();
121 return nextEntry !=null;
122 }
123
124 public Entry<Key, Value> next() {
125 findNextPage();
126 if( nextEntry !=null ) {
127 Entry<Key, Value> lastEntry = nextEntry;
128 nextEntry=null;
129 return lastEntry;
130 } else {
131 throw new NoSuchElementException();
132 }
133 }
134
135 public void remove() {
136 throw new UnsupportedOperationException();
137 }
138 }
139
140 /**
141 * The Marshaller is used to store and load the data in the BTreeNode into a Page.
142 *
143 * @param <Key>
144 * @param <Value>
145 */
146 static public class Marshaller<Key,Value> extends VariableMarshaller<BTreeNode<Key,Value>> {
147 private final BTreeIndex<Key,Value> index;
148
149 public Marshaller(BTreeIndex<Key,Value> index) {
150 this.index = index;
151 }
152
153 public void writePayload(BTreeNode<Key,Value> node, DataOutput os) throws IOException {
154 // Write the keys
155 short count = (short)node.keys.length; // cast may truncate value...
156 if( count != node.keys.length ) {
157 throw new IOException("Too many keys");
158 }
159
160 os.writeShort(count);
161 for (int i = 0; i < node.keys.length; i++) {
162 index.getKeyMarshaller().writePayload(node.keys[i], os);
163 }
164
165 if( node.isBranch() ) {
166 // If this is a branch...
167 os.writeBoolean(true);
168 for (int i = 0; i < count+1; i++) {
169 os.writeLong(node.children[i]);
170 }
171
172 } else {
173 // If this is a leaf
174 os.writeBoolean(false);
175 for (int i = 0; i < count; i++) {
176 index.getValueMarshaller().writePayload(node.values[i], os);
177 }
178 os.writeLong(node.next);
179 }
180 }
181
182 @SuppressWarnings("unchecked")
183 public BTreeNode<Key,Value> readPayload(DataInput is) throws IOException {
184 BTreeNode<Key,Value> node = new BTreeNode<Key,Value>(index);
185 int count = is.readShort();
186
187 node.keys = (Key[])new Object[count];
188 for (int i = 0; i < count; i++) {
189 node.keys[i] = index.getKeyMarshaller().readPayload(is);
190 }
191
192 if( is.readBoolean() ) {
193 node.children = new long[count+1];
194 for (int i = 0; i < count+1; i++) {
195 node.children[i] = is.readLong();
196 }
197 } else {
198 node.values = (Value[])new Object[count];
199 for (int i = 0; i < count; i++) {
200 node.values[i] = index.getValueMarshaller().readPayload(is);
201 }
202 node.next = is.readLong();
203 }
204 return node;
205 }
206 }
207
208 public BTreeNode(BTreeIndex<Key,Value> index) {
209 this.index = index;
210 }
211
212 public void setEmpty() {
213 setLeafData(createKeyArray(0), createValueArray(0));
214 }
215
216
217 /**
218 * Internal (to the BTreeNode) method. Because this method is called only by
219 * BTreeNode itself, no synchronization done inside of this method.
220 * @throws IOException
221 */
222 private BTreeNode<Key,Value> getChild(Transaction tx, int idx) throws IOException {
223 if (isBranch() && idx >= 0 && idx < children.length) {
224 BTreeNode<Key, Value> result = this.index.loadNode(tx, children[idx], this);
225 return result;
226 } else {
227 return null;
228 }
229 }
230
231 public Value remove(Transaction tx, Key key) throws IOException {
232
233 if(isBranch()) {
234 int idx = Arrays.binarySearch(keys, key);
235 idx = idx < 0 ? -(idx + 1) : idx + 1;
236 BTreeNode<Key, Value> child = getChild(tx, idx);
237 if( child.getPageId() == index.getPageId() ) {
238 throw new IOException("BTree corrupted: Cylce detected.");
239 }
240 Value rc = child.remove(tx, key);
241
242 // child node is now empty.. remove it from the branch node.
243 if( child.keys.length == 0 ) {
244
245 // If the child node is a branch, promote
246 if( child.isBranch() ) {
247 // This is cause branches are never really empty.. they just go down to 1 child..
248 children[idx] = child.children[0];
249 } else {
250
251 // The child was a leaf. Then we need to actually remove it from this branch node..
252
253 // We need to update the previous child's next pointer to skip over the child being removed....
254 if( idx > 0 && children.length > 1) {
255 BTreeNode<Key, Value> previousChild = getChild(tx, idx-1);
256 previousChild.next = child.next;
257 index.storeNode(tx, previousChild, true);
258 }
259
260 if( idx < children.length-1 ) {
261 // Delete it and key to the right.
262 setBranchData(arrayDelete(keys, idx), arrayDelete(children, idx));
263 } else {
264 // It was the last child.. Then delete it and key to the left
265 setBranchData(arrayDelete(keys, idx-1), arrayDelete(children, idx));
266 }
267
268 // If we are the root node, and only have 1 child left. Then
269 // make the root be the leaf node.
270 if( children.length == 1 && parent==null ) {
271 child = getChild(tx, 0);
272 keys = child.keys;
273 children = child.children;
274 values = child.values;
275 // free up the page..
276 tx.free(child.getPage());
277 }
278
279 }
280 index.storeNode(tx, this, true);
281 }
282
283 return rc;
284 } else {
285 int idx = Arrays.binarySearch(keys, key);
286 if (idx < 0) {
287 return null;
288 } else {
289 Value oldValue = values[idx];
290 setLeafData(arrayDelete(keys, idx), arrayDelete(values, idx));
291
292 if( keys.length==0 && parent!=null) {
293 tx.free(getPage());
294 } else {
295 index.storeNode(tx, this, true);
296 }
297
298 return oldValue;
299 }
300 }
301 }
302
303 public Value put(Transaction tx, Key key, Value value) throws IOException {
304 if (key == null) {
305 throw new IllegalArgumentException("Key cannot be null");
306 }
307
308 if( isBranch() ) {
309 return getLeafNode(tx, this, key).put(tx, key, value);
310 } else {
311 int idx = Arrays.binarySearch(keys, key);
312
313 Value oldValue=null;
314 if (idx >= 0) {
315 // Key was found... Overwrite
316 oldValue = values[idx];
317 values[idx] = value;
318 setLeafData(keys, values);
319 } else {
320 // Key was not found, Insert it
321 idx = -(idx + 1);
322 setLeafData(arrayInsert(keys, key, idx), arrayInsert(values, value, idx));
323 }
324
325 try {
326 index.storeNode(tx, this, allowOverflow());
327 } catch ( Transaction.PageOverflowIOException e ) {
328 // If we get an overflow
329 split(tx);
330 }
331
332 return oldValue;
333 }
334 }
335
336 private void promoteValue(Transaction tx, Key key, long nodeId) throws IOException {
337
338 int idx = Arrays.binarySearch(keys, key);
339 idx = idx < 0 ? -(idx + 1) : idx + 1;
340 setBranchData(arrayInsert(keys, key, idx), arrayInsert(children, nodeId, idx + 1));
341
342 try {
343 index.storeNode(tx, this, allowOverflow());
344 } catch ( Transaction.PageOverflowIOException e ) {
345 split(tx);
346 }
347
348 }
349
350 /**
351 * Internal to the BTreeNode method
352 */
353 private void split(Transaction tx) throws IOException {
354 Key[] leftKeys;
355 Key[] rightKeys;
356 Value[] leftValues=null;
357 Value[] rightValues=null;
358 long[] leftChildren=null;
359 long[] rightChildren=null;
360 Key separator;
361
362 int vc = keys.length;
363 int pivot = vc / 2;
364
365 // Split the node into two nodes
366 if( isBranch() ) {
367
368 leftKeys = createKeyArray(pivot);
369 leftChildren = new long[leftKeys.length + 1];
370 rightKeys = createKeyArray(vc - (pivot + 1));
371 rightChildren = new long[rightKeys.length + 1];
372
373 System.arraycopy(keys, 0, leftKeys, 0, leftKeys.length);
374 System.arraycopy(children, 0, leftChildren, 0, leftChildren.length);
375 System.arraycopy(keys, leftKeys.length + 1, rightKeys, 0, rightKeys.length);
376 System.arraycopy(children, leftChildren.length, rightChildren, 0, rightChildren.length);
377
378 // Is it a Simple Prefix BTree??
379 Prefixer<Key> prefixer = index.getPrefixer();
380 if(prefixer!=null) {
381 separator = prefixer.getSimplePrefix(leftKeys[leftKeys.length - 1], rightKeys[0]);
382 } else {
383 separator = keys[leftKeys.length];
384 }
385
386
387 } else {
388
389 leftKeys = createKeyArray(pivot);
390 leftValues = createValueArray(leftKeys.length);
391 rightKeys = createKeyArray(vc - pivot);
392 rightValues = createValueArray(rightKeys.length);
393
394 System.arraycopy(keys, 0, leftKeys, 0, leftKeys.length);
395 System.arraycopy(values, 0, leftValues, 0, leftValues.length);
396 System.arraycopy(keys, leftKeys.length, rightKeys, 0, rightKeys.length);
397 System.arraycopy(values, leftValues.length, rightValues, 0, rightValues.length);
398
399 // separator = getSeparator(leftVals[leftVals.length - 1],
400 // rightVals[0]);
401 separator = rightKeys[0];
402
403 }
404
405 // Promote the pivot to the parent branch
406 if (parent == null) {
407
408 // This can only happen if this is the root
409 BTreeNode<Key,Value> rNode = this.index.createNode(tx, this);
410 BTreeNode<Key,Value> lNode = this.index.createNode(tx, this);
411
412 if( isBranch() ) {
413 rNode.setBranchData(rightKeys, rightChildren);
414 lNode.setBranchData(leftKeys, leftChildren);
415 } else {
416 rNode.setLeafData(rightKeys, rightValues);
417 lNode.setLeafData(leftKeys, leftValues);
418 lNode.setNext(rNode.getPageId());
419 }
420
421 Key[] v = createKeyArray(1);
422 v[0]=separator;
423 setBranchData(v, new long[] { lNode.getPageId(), rNode.getPageId() });
424
425 index.storeNode(tx, this, true);
426 index.storeNode(tx, rNode, true);
427 index.storeNode(tx, lNode, true);
428
429 } else {
430 BTreeNode<Key,Value> rNode = this.index.createNode(tx, parent);
431
432 if( isBranch() ) {
433 setBranchData(leftKeys, leftChildren);
434 rNode.setBranchData(rightKeys, rightChildren);
435 } else {
436 rNode.setNext(next);
437 next = rNode.getPageId();
438 setLeafData(leftKeys, leftValues);
439 rNode.setLeafData(rightKeys, rightValues);
440 }
441
442 index.storeNode(tx, this, true);
443 index.storeNode(tx, rNode, true);
444 parent.promoteValue(tx, separator, rNode.getPageId());
445 }
446 }
447
448 public void printStructure(Transaction tx, PrintWriter out, String prefix) throws IOException {
449 if( prefix.length()>0 && parent == null ) {
450 throw new IllegalStateException("Cycle back to root node detected.");
451 }
452
453 if( isBranch() ) {
454 for(int i=0 ; i < children.length; i++) {
455 BTreeNode<Key, Value> child = getChild(tx, i);
456 if( i == children.length-1) {
457 out.println(prefix+"\\- "+child.getPageId()+(child.isBranch()?" ("+child.children.length+")":""));
458 child.printStructure(tx, out, prefix+" ");
459 } else {
460 out.println(prefix+"|- "+child.getPageId()+(child.isBranch()?" ("+child.children.length+")":"")+" : "+keys[i]);
461 child.printStructure(tx, out, prefix+" ");
462 }
463 }
464 }
465 }
466
467
468 public int getMinLeafDepth(Transaction tx, int depth) throws IOException {
469 depth++;
470 if( isBranch() ) {
471 int min = Integer.MAX_VALUE;
472 for(int i=0 ; i < children.length; i++) {
473 min = Math.min(min, getChild(tx, i).getMinLeafDepth(tx, depth));
474 }
475 return min;
476 } else {
477 // print(depth*2, "- "+page.getPageId());
478 return depth;
479 }
480 }
481
482 public int getMaxLeafDepth(Transaction tx, int depth) throws IOException {
483 depth++;
484 if( isBranch() ) {
485 int v = 0;
486 for(int i=0 ; i < children.length; i++) {
487 v = Math.max(v, getChild(tx, i).getMaxLeafDepth(tx, depth));
488 }
489 depth = v;
490 }
491 return depth;
492 }
493
494 public Value get(Transaction tx, Key key) throws IOException {
495 if (key == null) {
496 throw new IllegalArgumentException("Key cannot be null");
497 }
498 if( isBranch() ) {
499 return getLeafNode(tx, this, key).get(tx, key);
500 } else {
501 int idx = Arrays.binarySearch(keys, key);
502 if (idx < 0) {
503 return null;
504 } else {
505 return values[idx];
506 }
507 }
508 }
509
510 public boolean isEmpty(final Transaction tx) throws IOException {
511 return keys.length==0;
512 }
513
514 public void visit(Transaction tx, BTreeVisitor<Key, Value> visitor) throws IOException {
515 if (visitor == null) {
516 throw new IllegalArgumentException("Visitor cannot be null");
517 }
518 if( isBranch() ) {
519 for(int i=0; i < this.children.length; i++) {
520 Key key1 = null;
521 if( i!=0 ) {
522 key1 = keys[i-1];
523 }
524 Key key2 = null;
525 if( i!=this.children.length-1 ) {
526 key2 = keys[i];
527 }
528 if( visitor.isInterestedInKeysBetween(key1, key2) ) {
529 BTreeNode<Key, Value> child = getChild(tx, i);
530 child.visit(tx, visitor);
531 }
532 }
533 } else {
534 visitor.visit(Arrays.asList(keys), Arrays.asList(values));
535 }
536 }
537
538 public Map.Entry<Key,Value> getFirst(Transaction tx) throws IOException {
539 BTreeNode<Key, Value> node = this;
540 while( node .isBranch() ) {
541 node = node.getChild(tx, 0);
542 }
543 if( node.values.length>0 ) {
544 return new KeyValueEntry(node.keys[0], node.values[0]);
545 } else {
546 return null;
547 }
548 }
549
550 public Map.Entry<Key,Value> getLast(Transaction tx) throws IOException {
551 BTreeNode<Key, Value> node = this;
552 while( node.isBranch() ) {
553 node = node.getChild(tx, node.children.length-1);
554 }
555 if( node.values.length>0 ) {
556 int idx = node.values.length-1;
557 return new KeyValueEntry(node.keys[idx], node.values[idx]);
558 } else {
559 return null;
560 }
561 }
562
563 public BTreeNode<Key,Value> getFirstLeafNode(Transaction tx) throws IOException {
564 BTreeNode<Key, Value> node = this;
565 while( node .isBranch() ) {
566 node = node.getChild(tx, 0);
567 }
568 return node;
569 }
570
571 public Iterator<Map.Entry<Key,Value>> iterator(final Transaction tx, Key startKey) throws IOException {
572 if (startKey == null) {
573 return iterator(tx);
574 }
575 if( isBranch() ) {
576 return getLeafNode(tx, this, startKey).iterator(tx, startKey);
577 } else {
578 int idx = Arrays.binarySearch(keys, startKey);
579 if (idx < 0) {
580 idx = -(idx + 1);
581 }
582 return new BTreeIterator(tx, this, idx);
583 }
584 }
585
586 public Iterator<Map.Entry<Key,Value>> iterator(final Transaction tx) throws IOException {
587 return new BTreeIterator(tx, getFirstLeafNode(tx), 0);
588 }
589
590 public void clear(Transaction tx) throws IOException {
591 if( isBranch() ) {
592 for (int i = 0; i < children.length; i++) {
593 BTreeNode<Key, Value> node = index.loadNode(tx, children[i], this);
594 node.clear(tx);
595 tx.free(node.getPage());
596 }
597 }
598 // Reset the root node to be a leaf.
599 if( parent == null ) {
600 setLeafData(createKeyArray(0), createValueArray(0));
601 next=-1;
602 index.storeNode(tx, this, true);
603 }
604 }
605
606
607 private static <Key,Value> BTreeNode<Key, Value> getLeafNode(Transaction tx, final BTreeNode<Key, Value> node, Key key) throws IOException {
608 BTreeNode<Key, Value> current = node;
609 while( true ) {
610 if( current.isBranch() ) {
611 int idx = Arrays.binarySearch(current.keys, key);
612 idx = idx < 0 ? -(idx + 1) : idx + 1;
613 BTreeNode<Key, Value> child = current.getChild(tx, idx);
614
615 // A little cycle detection for sanity's sake
616 if( child == node ) {
617 throw new IOException("BTree corrupted: Cylce detected.");
618 }
619
620 current = child;
621 } else {
622 break;
623 }
624 }
625 return current;
626 }
627
628 public boolean contains(Transaction tx, Key key) throws IOException {
629 if (key == null) {
630 throw new IllegalArgumentException("Key cannot be null");
631 }
632
633 if( isBranch() ) {
634 return getLeafNode(tx, this, key).contains(tx, key);
635 } else {
636 int idx = Arrays.binarySearch(keys, key);
637 if (idx < 0) {
638 return false;
639 } else {
640 return true;
641 }
642 }
643 }
644
645 ///////////////////////////////////////////////////////////////////
646 // Implementation methods
647 ///////////////////////////////////////////////////////////////////
648
649
650 private boolean allowOverflow() {
651 // Only allow page overflow if there are <= 3 keys in the node. Otherwise a split will occur on overflow
652 return this.keys.length<=3;
653 }
654
655
656 private void setLeafData(Key[] keys, Value[] values) {
657 this.keys = keys;
658 this.values = values;
659 this.children = null;
660 }
661
662 private void setBranchData(Key[] keys, long[] nodeIds) {
663 this.keys = keys;
664 this.children = nodeIds;
665 this.values = null;
666 }
667
668 @SuppressWarnings("unchecked")
669 private Key[] createKeyArray(int size) {
670 return (Key[])new Object[size];
671 }
672
673 @SuppressWarnings("unchecked")
674 private Value[] createValueArray(int size) {
675 return (Value[])new Object[size];
676 }
677
678 @SuppressWarnings("unchecked")
679 static private <T> T[] arrayDelete(T[] vals, int idx) {
680 T[] newVals = (T[])new Object[vals.length - 1];
681 if (idx > 0) {
682 System.arraycopy(vals, 0, newVals, 0, idx);
683 }
684 if (idx < newVals.length) {
685 System.arraycopy(vals, idx + 1, newVals, idx, newVals.length - idx);
686 }
687 return newVals;
688 }
689
690 static private long[] arrayDelete(long[] vals, int idx) {
691 long[] newVals = new long[vals.length - 1];
692 if (idx > 0) {
693 System.arraycopy(vals, 0, newVals, 0, idx);
694 }
695 if (idx < newVals.length) {
696 System.arraycopy(vals, idx + 1, newVals, idx, newVals.length - idx);
697 }
698 return newVals;
699 }
700
701 @SuppressWarnings("unchecked")
702 static private <T> T[] arrayInsert(T[] vals, T val, int idx) {
703 T[] newVals = (T[])new Object[vals.length + 1];
704 if (idx > 0) {
705 System.arraycopy(vals, 0, newVals, 0, idx);
706 }
707 newVals[idx] = val;
708 if (idx < vals.length) {
709 System.arraycopy(vals, idx, newVals, idx + 1, vals.length - idx);
710 }
711 return newVals;
712 }
713
714
715 static private long[] arrayInsert(long[] vals, long val, int idx) {
716
717 long[] newVals = new long[vals.length + 1];
718 if (idx > 0) {
719 System.arraycopy(vals, 0, newVals, 0, idx);
720 }
721 newVals[idx] = val;
722 if (idx < vals.length) {
723 System.arraycopy(vals, idx, newVals, idx + 1, vals.length - idx);
724 }
725 return newVals;
726 }
727
728 ///////////////////////////////////////////////////////////////////
729 // Property Accessors
730 ///////////////////////////////////////////////////////////////////
731 private boolean isBranch() {
732 return children!=null;
733 }
734
735 public long getPageId() {
736 return page.getPageId();
737 }
738
739 public BTreeNode<Key, Value> getParent() {
740 return parent;
741 }
742
743 public void setParent(BTreeNode<Key, Value> parent) {
744 this.parent = parent;
745 }
746
747 public Page<BTreeNode<Key, Value>> getPage() {
748 return page;
749 }
750
751 public void setPage(Page<BTreeNode<Key, Value>> page) {
752 this.page = page;
753 }
754
755 public long getNext() {
756 return next;
757 }
758
759 public void setNext(long next) {
760 this.next = next;
761 }
762
763 @Override
764 public String toString() {
765 return "[BTreeNode "+(isBranch()?"branch":"leaf")+": "+Arrays.asList(keys)+"]";
766 }
767
768 }
769
770