• B-Tree 的定义
  • 逻辑图
  • B-Tree in Go

    定义

  1. 树的每个节点不超过 m 个儿子(其中 m 为二叉树的阶数)
  2. 除根节点和叶子节点之外的节点有不少于 [m / 2] 个儿子
  3. 根节点至少有两个儿子(除非它本身就是一个叶子节点)
  4. 所有的非根节点必须包含以下信息:B-Tree(Balanced Tree) - 图1
    1. n 为关键字个数 B-Tree(Balanced Tree) - 图2
    2. B-Tree(Balanced Tree) - 图3 是关键字,从左到右升序排列
    3. B-Tree(Balanced Tree) - 图4 是指向子节点的指针,指向一个关键字都在 B-Tree(Balanced Tree) - 图5B-Tree(Balanced Tree) - 图6 之间的子树
  5. 所有的叶子节点都出现在同一层次上,并且不带任何信息(可以看做是外部节点,或者是查找失败节点,实际上,这些节点并不存在,指向这些节点的指针为空)

逻辑图

image.png

B-Tree in Go

代码来自:gods

B-Tree 的定义

  1. // Get searches the node in the tree by key and returns its value or nil if key is not found in tree.
  2. // Second return parameter is true if key was found, otherwise false.
  3. // Key should adhere to the comparator's type assertion, otherwise method panics.
  4. func (tree *Tree) Get(key interface{}) (value interface{}, found bool) {
  5. node, index, found := tree.searchRecursively(tree.Root, key)
  6. if found {
  7. return node.Entries[index].Value, true
  8. }
  9. return nil, false
  10. }
  11. // Tree holds elements of the B-tree
  12. type Tree struct {
  13. Root *Node // Root node
  14. Comparator utils.Comparator // Key comparator
  15. size int // Total number of keys in the tree
  16. m int // order (maximum number of children)
  17. }
  18. // Node is a single element within the tree
  19. type Node struct {
  20. Parent *Node
  21. Entries []*Entry // Contained keys in node
  22. Children []*Node // Children nodes
  23. }
  24. // Entry represents the key-value pair contained within nodes
  25. type Entry struct {
  26. Key interface{}
  27. Value interface{}
  28. }


  1. // searchRecursively searches recursively down the tree starting at the startNode
  2. func (tree *Tree) searchRecursively(startNode *Node, key interface{}) (node *Node, index int, found bool) {
  3. if tree.Empty() {
  4. return nil, -1, false
  5. }
  6. node = startNode
  7. for {
  8. index, found = tree.search(node, key)
  9. if found {
  10. return node, index, true
  11. }
  12. if tree.isLeaf(node) {
  13. return nil, -1, false
  14. }
  15. node = node.Children[index]
  16. }
  17. }
  18. // search searches only within the single node among its entries
  19. func (tree *Tree) search(node *Node, key interface{}) (index int, found bool) {
  20. low, high := 0, len(node.Entries)-1
  21. var mid int
  22. for low <= high {
  23. mid = (high + low) / 2
  24. compare := tree.Comparator(key, node.Entries[mid].Key)
  25. switch {
  26. case compare > 0:
  27. low = mid + 1
  28. case compare < 0:
  29. high = mid - 1
  30. case compare == 0:
  31. return mid, true
  32. }
  33. }
  34. return low, false
  35. }
  • 二分查找

这里弄清楚 []*Entry[]*Node 下标之间的关系即可。

前面省略了 Get 和 searchRecursively 方法,如果 searchRecursively 在 search 外面包了一层 for 循环,用来查找节点。

这个函数在 Put 和 Remove 的实现中也用到了,因为可以查找到节点的位置。一般来说,暴露出去的函数只有那么简单的几个,而内部有很多复杂的非暴露出去的函数,这些函数分层次,分别处理完一部分工作,然后调用下一层的函数。这些抽象出来的函数一定会有重复利用的价值,不然任何意义。在 gods 的源码中,出现了循环调用的情况,即上层调用下层,下层也调用上层,虽然不算什么错,但是我不喜欢。那么我想换一种方式来实现这种功能。

Put

通常,键的数量被选定在 d 和 2d 之间。其中 d 是键的最小数量, 是树最小的度或分支因子 。在实际中,键值占用了节点中大部分的空间。因数 2 将保证节点可以被拆分或组合。如果一个内部节点有 2d 个键,那么要添加一个键值给此节点,只需要拆分这 个键为 2 个 拥有 d 个键的节点,并把中间值节点移动到父节点。每一个拆分的节点需要拥有足够数目的键。相似地,如果一个内部节点和他的邻居两者都有 d 个键,那么将通过它与邻居的合并来删除一个键。删除此键将导致此节点拥有 d-1 个键;与邻居的合并则加上 d 个键,再加上从邻居节点的父节点移来的一个键值。结果为完全填充的 2d 个键。

——维基百科

代码有点长,我们来一步一步分析

  1. // 总体的 insert,里面调用了 tree 的两个方法,分别是 tree.insertIntoLeaf 和
  2. // tree.insertIntoInternal
  3. func (tree *Tree) insert(node *Node, entry *Entry) (inserted bool) {
  4. if tree.isLeaf(node) {
  5. return tree.insertIntoLeaf(node, entry)
  6. }
  7. return tree.insertIntoInternal(node, entry)
  8. }
  9. //
  10. func (tree *Tree) insertIntoLeaf(node *Node, entry *Entry) (inserted bool) {
  11. insertPosition, found := tree.search(node, entry.Key)
  12. if found {
  13. node.Entries[insertPosition] = entry
  14. return false
  15. }
  16. // Insert entry's key in the middle of the node
  17. node.Entries = append(node.Entries, nil)
  18. copy(node.Entries[insertPosition+1:], node.Entries[insertPosition:])
  19. node.Entries[insertPosition] = entry
  20. tree.split(node)
  21. return true
  22. }
  23. // insertIntoInternal
  24. func (tree *Tree) insertIntoInternal(node *Node, entry *Entry) (inserted bool) {
  25. insertPosition, found := tree.search(node, entry.Key)
  26. if found {
  27. node.Entries[insertPosition] = entry
  28. return false
  29. }
  30. return tree.insert(node.Children[insertPosition], entry)
  31. }

你会发现,最终都回到了叶子节点的插入上。

只留下了 insert 相关函数,逻辑关系
gods-b-tree-put.svg

splitNonRoot

  1. func (tree *Tree) splitNonRoot(node *Node) {
  2. middle := tree.middle()
  3. parent := node.Parent
  4. left := &Node{Entries: append([]*Entry(nil), node.Entries[:middle]...), Parent: parent}
  5. right := &Node{Entries: append([]*Entry(nil), node.Entries[middle+1:]...), Parent: parent}
  6. // Move children from the node to be split into left and right nodes
  7. if !tree.isLeaf(node) {
  8. left.Children = append([]*Node(nil), node.Children[:middle+1]...)
  9. right.Children = append([]*Node(nil), node.Children[middle+1:]...)
  10. setParent(left.Children, left)
  11. setParent(right.Children, right)
  12. }
  13. insertPosition, _ := tree.search(parent, node.Entries[middle].Key)
  14. // Insert middle key into parent
  15. parent.Entries = append(parent.Entries, nil)
  16. copy(parent.Entries[insertPosition+1:], parent.Entries[insertPosition:])
  17. parent.Entries[insertPosition] = node.Entries[middle]
  18. // Set child left of inserted key in parent to the created left node
  19. parent.Children[insertPosition] = left
  20. // Set child right of inserted key in parent to the created right node
  21. parent.Children = append(parent.Children, nil)
  22. copy(parent.Children[insertPosition+2:], parent.Children[insertPosition+1:])
  23. parent.Children[insertPosition+1] = right
  24. tree.split(parent)
  25. }

Remove

直接调用上面的 searchRecursively 即可,内部调用上面提到 search 方法找到删除节点。

  1. // Remove remove the node from the tree by key.
  2. // Key should adhere to the comparator's type assertion, otherwise method panics.
  3. func (tree *Tree) Remove(key interface{}) {
  4. node, index, found := tree.searchRecursively(tree.Root, key)
  5. if found {
  6. tree.delete(node, index)
  7. tree.size--
  8. }
  9. }

delete: 一开始我还以为是 Go 内置的函数 delete,仔细一看好像不是。删除某个 Entry 的时候,可能会导致树不再平衡,有些节点可能需要合并。

  1. // delete deletes an entry in node at entries' index
  2. // ref.: https://en.wikipedia.org/wiki/B-tree#Deletion
  3. func (tree *Tree) delete(node *Node, index int) {
  4. // deleting from a leaf node
  5. if tree.isLeaf(node) {
  6. deletedKey := node.Entries[index].Key
  7. tree.deleteEntry(node, index)
  8. tree.rebalance(node, deletedKey)
  9. if len(tree.Root.Entries) == 0 {
  10. tree.Root = nil
  11. }
  12. return
  13. }
  14. // deleting from an internal node
  15. leftLargestNode := tree.right(node.Children[index]) // largest node in the left sub-tree (assumed to exist)
  16. leftLargestEntryIndex := len(leftLargestNode.Entries) - 1
  17. node.Entries[index] = leftLargestNode.Entries[leftLargestEntryIndex]
  18. deletedKey := leftLargestNode.Entries[leftLargestEntryIndex].Key
  19. tree.deleteEntry(leftLargestNode, leftLargestEntryIndex)
  20. tree.rebalance(leftLargestNode, deletedKey)
  21. }
  22. // rebalance rebalances the tree after deletion if necessary and returns true, otherwise false.
  23. // Note that we first delete the entry and then call rebalance, thus the passed deleted key as reference.
  24. func (tree *Tree) rebalance(node *Node, deletedKey interface{}) {
  25. // check if rebalancing is needed
  26. if node == nil || len(node.Entries) >= tree.minEntries() {
  27. return
  28. }
  29. // try to borrow from left sibling
  30. leftSibling, leftSiblingIndex := tree.leftSibling(node, deletedKey)
  31. if leftSibling != nil && len(leftSibling.Entries) > tree.minEntries() {
  32. // rotate right
  33. node.Entries = append([]*Entry{node.Parent.Entries[leftSiblingIndex]}, node.Entries...) // prepend parent's separator entry to node's entries
  34. node.Parent.Entries[leftSiblingIndex] = leftSibling.Entries[len(leftSibling.Entries)-1]
  35. tree.deleteEntry(leftSibling, len(leftSibling.Entries)-1)
  36. if !tree.isLeaf(leftSibling) {
  37. leftSiblingRightMostChild := leftSibling.Children[len(leftSibling.Children)-1]
  38. leftSiblingRightMostChild.Parent = node
  39. node.Children = append([]*Node{leftSiblingRightMostChild}, node.Children...)
  40. tree.deleteChild(leftSibling, len(leftSibling.Children)-1)
  41. }
  42. return
  43. }
  44. // try to borrow from right sibling
  45. rightSibling, rightSiblingIndex := tree.rightSibling(node, deletedKey)
  46. if rightSibling != nil && len(rightSibling.Entries) > tree.minEntries() {
  47. // rotate left
  48. node.Entries = append(node.Entries, node.Parent.Entries[rightSiblingIndex-1]) // append parent's separator entry to node's entries
  49. node.Parent.Entries[rightSiblingIndex-1] = rightSibling.Entries[0]
  50. tree.deleteEntry(rightSibling, 0)
  51. if !tree.isLeaf(rightSibling) {
  52. rightSiblingLeftMostChild := rightSibling.Children[0]
  53. rightSiblingLeftMostChild.Parent = node
  54. node.Children = append(node.Children, rightSiblingLeftMostChild)
  55. tree.deleteChild(rightSibling, 0)
  56. }
  57. return
  58. }
  59. // merge with siblings
  60. if rightSibling != nil {
  61. // merge with right sibling
  62. node.Entries = append(node.Entries, node.Parent.Entries[rightSiblingIndex-1])
  63. node.Entries = append(node.Entries, rightSibling.Entries...)
  64. deletedKey = node.Parent.Entries[rightSiblingIndex-1].Key
  65. tree.deleteEntry(node.Parent, rightSiblingIndex-1)
  66. tree.appendChildren(node.Parent.Children[rightSiblingIndex], node)
  67. tree.deleteChild(node.Parent, rightSiblingIndex)
  68. } else if leftSibling != nil {
  69. // merge with left sibling
  70. entries := append([]*Entry(nil), leftSibling.Entries...)
  71. entries = append(entries, node.Parent.Entries[leftSiblingIndex])
  72. node.Entries = append(entries, node.Entries...)
  73. deletedKey = node.Parent.Entries[leftSiblingIndex].Key
  74. tree.deleteEntry(node.Parent, leftSiblingIndex)
  75. tree.prependChildren(node.Parent.Children[leftSiblingIndex], node)
  76. tree.deleteChild(node.Parent, leftSiblingIndex)
  77. }
  78. // make the merged node the root if its parent was the root and the root is empty
  79. if node.Parent == tree.Root && len(tree.Root.Entries) == 0 {
  80. tree.Root = node
  81. node.Parent = nil
  82. return
  83. }
  84. // parent might underflow, so try to rebalance if necessary
  85. tree.rebalance(node.Parent, deletedKey)
  86. }

看起来好复杂,于是我将它画成一张图

Others

其中迭代器的设计也很优秀,就像 C++ 标准库中的那样,一部分人开发数据结构,一部分人开发算法,通过迭代器来连接。

Reference

  • B-Tree

  • [ ] 迭代器的实现

  • B+ 树的实现