我正在实现一个B树插入功能。
但是它会遇到问题。对于插入 50 个及以下的元素,它似乎工作正常,但插入 100 个元素会产生错误:
向量下标超出范围
这是我尝试在程序中实现的代码和调试输出:
#include <iostream>
#include <vector>
#include <algorithm>
#include <queue>
#include <stack>
#include <ctime>
using namespace std;
struct Node {
vector<int>key;
vector<Node*>children;
Node* parent = NULL;
int index = 0;
bool is_leaf()
{
if (children.size() == 0)
{
return true;
}
return false;
}
Node(const vector<int>©vect)
{
key = copyvect;
}
Node(int data)
{
key.push_back(data);
}
Node()
{
}
};
int binarySearch(vector<int>&arr, int x)
{
int l = 0;
int r = arr.size() - 1;
if (arr.size() == 0)
{
return -1;
}
int mid = 0;
while (l <= r)
{
mid = (l + r) / 2;
if (arr[mid] < x)
{
l = mid + 1;
}
else if (arr[mid] > x)
{
r = mid - 1;
}
else {
return mid;
}
}
return mid;
}
Node* insert_node(Node* root, int key, int order)
{
int minkey = order / 2 - 1;
int maxkey = order - 1;
int maxchild = order;
if (root->key.size() == 0)
{
root->key.push_back(key);
return root;
}
else
{
Node* p = root;
int searchindex = 0;
while (!p->is_leaf())
{
searchindex = binarySearch(p->key, key);
cout << "searchindex: " << searchindex << endl;
if (p->key[searchindex] < key)
{
p->children[searchindex + 1]->index = searchindex + 1;
p = p->children[searchindex + 1];
}
else
{
p->children[searchindex]->index = searchindex;
p = p->children[searchindex];
}
}
int searchindex2 = binarySearch(p->key, key);
cout << "searchindex2: " << searchindex2 << endl;
if (p->key[searchindex2] < key)
{
p->key.insert(p->key.begin() + searchindex2 + 1, key);
}
else
{
p->key.insert(p->key.begin() + searchindex2, key);
}
while (p != NULL)
{
if (p->key.size() == maxkey)
{
int medianIndex = p->key.size() / 2;
int mediankey = p->key[medianIndex];
Node* newNode = new Node;
newNode->parent = p->parent;
newNode->key.assign(p->key.begin(), p->key.begin() + medianIndex);
cout << "newnode key size: " << newNode->key.size() << endl;
p->key.erase(p->key.begin(), p->key.begin() + medianIndex + 1);
cout << "p->key size after erase: " << p->key.size() << endl;
if (!p->is_leaf())
{
newNode->children.assign(p->children.begin(), p->children.begin() + p->children.size() / 2 + 1);
cout << "newNode children size: " << newNode->children.size() << endl;
p->children.erase(p->children.begin(), p->children.begin() + p->children.size() / 2 + 1);
cout << "p children size after erase: " << p->children.size() << endl;
}
if (p->parent != NULL)
{
cout << "index is: " << p->index << endl;
p->parent->children.insert(p->parent->children.begin() + p->index, newNode);
cout << "p->parent->children size: " << p->parent->children.size() << endl;
p->parent->key.insert(p->parent->key.begin() + p->index, mediankey);
cout << "p->parent->key size: " << p->parent->key.size() << endl;
}
else
{
Node* newParent = new Node(mediankey);
newParent->children.push_back(newNode);
newParent->children.push_back(p);
newNode->parent = newParent;
p->parent = newParent;
return newParent;
}
}
if (p->parent == NULL)
{
return p;
}
p = p->parent;
}
return root;
}
}
void levelorder(Node* root)
{
if (root == nullptr)
return;
queue<Node*> q;
q.push(root);
while (!q.empty())
{
int countNode = q.size();
while (countNode > 0)
{
Node* current = q.front();
q.pop();
for (int i = 0; i < current->key.size(); i++)
{
cout << current->key[i] << " ";
if (!current->is_leaf())
{
q.push(current->children[i]);
if (i == current->key.size() - 1)
{
q.push(current->children.back());
}
}
}
cout << " ";
countNode--;
}
cout << '\n';
}
}
int main()
{
srand(time(NULL));
int n;
cin >> n;
Node* root = new Node;
for (int i = 0; i < n; i++)
{
root = insert_node(root, rand(), 6);
}
cout << '\n';
levelorder(root);
}
我尝试通过打印所有可能导致此错误的内容来调试它。但检查后,我不知道为什么会这样。
这是输出:
searchindex2: 0
searchindex2: 1
searchindex2: 2
searchindex2: 3
newnode key size: 2
p->key size after erase: 2
searchindex: 0
searchindex2: 1
searchindex: 0
searchindex2: 2
searchindex: 0
searchindex2: 0
newnode key size: 2
p->key size after erase: 2
index is: 1
p->parent->children size: 3
p->parent->key size: 2
searchindex: 1
searchindex2: 0
searchindex: 1
searchindex2: 1
searchindex: 1
searchindex2: 2
searchindex: 1
searchindex2: 2
searchindex: 1
searchindex2: 2
newnode key size: 2
p->key size after erase: 2
index is: 2
p->parent->children size: 4
p->parent->key size: 3
searchindex: 0
searchindex2: 3
newnode key size: 2
p->key size after erase: 2
index is: 1
p->parent->children size: 5
p->parent->key size: 4
searchindex: 3
searchindex2: 1
searchindex: 3
searchindex2: 1
searchindex: 0
searchindex2: 0
searchindex: 3
searchindex2: 2
searchindex: 2
searchindex2: 0
searchindex: 0
searchindex2: 1
searchindex: 3
searchindex2: 3
newnode key size: 2
p->key size after erase: 2
index is: 3
p->parent->children size: 6
p->parent->key size: 5
newnode key size: 2
p->key size after erase: 2
newNode children size: 4
p children size after erase: 2
searchindex: 0
searchindex: 1
我做错了什么?
有几个问题,但在讨论它们之前,我想提出一些建议以更有效地调试:
为了使调试输出更加“直观”,我会编写一个函数,可以以指示哪些键出现在哪些节点中以及哪些节点是哪些其他节点的子节点的方式打印树。一旦您获得具有三层或更多层的树,
levelOrder
生成的输出就会对这些方面产生一些疑问。你很容易迷失在这个输出中。然而你需要看看结构是什么。一种简单的输出格式是打印旋转 90° 的树,以便根节点出现在左侧(列),其子节点出现在右侧,等等。例如,对以下输出进行成像:
98
96
95
94
89
86
77
76
75
74
73
72
71
69
67
66
64
63
56
55
50
42
41
40
24
20
19
18
17
15
所以这里根节点有两个键:50 和 73。它有三个子节点。第一个孩子有键 19 和 40。第二个孩子有键 63 和 67,根的最后一个孩子有键 77 和 95。你明白了...
这是一个函数:
// Helper function that gets a second argument that helps indenting the output
void printTree(Node *current, string tab) {
for (int i = current->key.size(); i >=0; i--) {
if (!current->is_leaf()) {
printTree(current->children[i], tab + " ");
}
if (i == 0) break;
cout << tab << current->key[i-1] << "\n";
}
}
void printTree(Node *root) {
cout << "TREE:\n";
printTree(root, "");
cout << "\n";
}
其次,您需要在每次插入后检查树的一致性。您可以检查以下内容:
如果您在树应处于一致状态的时刻调用这样的函数,您将尽早发现错误。
您可以使用以下函数来实现此目的:
#include <climits>
// Helper function that gets the window of allowed keys and expected parent
void verifyTree(Node *current, int low, int high, Node *parent) {
// Verify parent back-reference
if (current->parent != parent) {
cout << "parent inconsistency at " << current->key[0] << " and its parent\n";
exit(1);
}
bool isLeaf = current->is_leaf();
int numKeys = current->key.size();
// Verify this node has the expected number of children
int expectedChildren = isLeaf ? 0 : numKeys + 1;
if (expectedChildren != current->children.size()) {
cout << "inconcisitency in number of children at parent of "
<< current->key[0] << ". Expected " << expectedChildren
<< " but got " << current->children.size() << "\n";
exit(1);
}
// Verify the order of the keys
int prevKey = low;
for (int i = 0; i < numKeys; i++) {
int key = current->key[i];
if (key < prevKey) {
cout << "wrong key order: " << prevKey << " and " << key << "\n";
exit(1);
}
if (!isLeaf) {
verifyTree(current->children[i], prevKey, key, current);
}
prevKey = key;
}
if (!isLeaf) {
verifyTree(current->children[numKeys], prevKey, high, current);
}
}
void verifyTree(Node *root) {
verifyTree(root, INT_MIN, INT_MAX, nullptr);
}
最后,要找到出现问题的较小输入,不要传递 6 作为
order
参数的值,而应传递 4。并且不要产生如此大的整数,但限制为两位数(使用 rand() % 100
) .
通过此配置,很快发现只有 9 个值的输入会触发不一致,例如这些输入值:
6, 3, 5, 4, 1, 8, 7, 2, 9
通过上述功能,可以更轻松地查明问题出在哪里:
当内部节点分裂时,您的代码移动到新节点的子节点数量是错误的(当
p->children.size()
为偶数时),移动一个子节点太多。要纠正此错误,请更改以下语句:
newNode->children.assign(p->children.begin(), p->children.begin() + p->children.size() / 2 + 1);
p->children.erase(p->children.begin(), p->children.begin() + p->children.size() / 2 + 1);
至:
newNode->children.assign(p->children.begin(), p->children.begin() + medianIndex + 1);
p->children.erase(p->children.begin(), p->children.begin() + medianIndex + 1);
在与上面代码相同的部分,您引入了子父级不一致,因为移动到
newNode
的子级仍将其先前的父级作为 parent
引用。这些应该更新。所以在上面提到的代码下面添加这段代码:
for (auto child : newNode->children) {
child->parent = newNode;
}
这些错误修复足以使其正常运行。
不过,还有一些其他的说明:
binarySearch
有点奇怪,因为它只能返回给定向量 (0..𝑛−1) 的索引,这迫使您仍然检查返回索引处的键是否小于键您想要插入并选择该索引处的子项或下一个子项。让 binarySearch
返回一个可能是 𝑛 的值会更有用,始终确保索引可用于选择子项。这很容易做到:只需将最后的 return m;
替换为 return l;
,然后就可以简化调用 binarySearch
后的代码。
变量
minkey
和 maxchild
已定义但从未使用。这些可以删除。
insert_node
是一个对其用途具有误导性的名称。它插入一个key,这可能会也可能不会导致插入一个或多个新节点。
您的代码在
p->key.size() == maxkey
时执行拆分,但这意味着您实际上正在构建 order-1
树。仅当 p->key.size() > maxkey
时才应分割节点。
将树搜索的“路径”存储在节点的
index
字段中并不是那么优雅:这些索引具有临时性质,但它们在节点中可用,没有任何用途。
实际上并不需要在每个节点中维护一个
parent
字段。如果使用递归实现插入(和删除)算法,则可以使用调用堆栈来维护这些关系。这也将是跟踪沿路径(上一点)使用的索引的解决方案。