我想使用向量来实现Trie来存储节点,但是以某种方式我的insert方法不起作用。我设法使用其他实现来构建trie数据结构,但是我想了解为什么当前的实现无法正常工作。
Works(不是基于索引的子代/引用存储):
struct Trie {
struct Trie *references[26];
bool end; //It is true if node represents end of word.
};
不工作(基于索引的子/引用存储):
struct node {
int references[26] = {0};
bool end;
};
由于插入功能错误,它不起作用。
void insert_word(string s){
node *current_node = &trie[0];
// current_node->references[4] = 9999 WORKS! Node in Trie is UPDATED
for(int i=0;i<s.size();i++){
print_trie();
int letter_num = static_cast<int>(tolower(s[i])) - static_cast<int>('a');
int next_index = current_node->references[letter_num];
cout << "letter num: " << letter_num << " next index: " << next_index << endl;
if(next_index == 0){
node new_node;
trie.push_back(new_node);
current_node->references[letter_num] = trie.size()-1; // DOESN'T WORK! Node in Trie is NOT UPDATED
cout << "new value: ";
for(auto c:current_node->references)
cout << c << " ";
cout << endl;
cout << "in for" << endl;
print_trie();
current_node = &trie.back();
} else{
current_node = &trie[next_index];
}
}
current_node->end = true;
}
问题是,当我访问current_node
作为对对象的引用时,trie向量,并且我更改了它的值。 trie向量中的对象/节点并非总是更新。它可以在第二行中工作,但在更深的地方它会停止工作。我想了解原因。
这里是我编写的简短调试程序,用于简化问题。在这里一切似乎都工作正常。
n1.references[0] = 1;
n2.references[0] = 2;
n3.references[0] = 3;
trie.push_back(n1);
trie.push_back(n2);
trie.push_back(n3);
node *n = &trie[0];
n->references[0] = 10; // Tree is updated properly
n = &trie[1];
n->references[0] = 11; // Tree is updated properly
您能帮我理解为什么插入功能无法正常工作吗?
编辑:最小的工作示例
#include <vector>
#include <string>
#include <iostream>
using namespace std;
struct node
{
int num_words;
int references [26] = {0};
bool end;
};
vector<node> trie;
int n;
void print_trie(){
cout << "#### NEW PRINT TRIE ##### " << endl;
for(int i=0;i<trie.size();i++){
cout << "node " << i << ": ";
for(int j=0;j<26;j++)
cout << trie[i].references[j] << " ";
cout << endl;
}
}
void insert_word(string s){
node *current_node = &trie[0];
// current_node->references[4] = 9999 WORKS! Node in Trie is UPDATED
for(int i=0;i<s.size();i++){
print_trie();
int letter_num = static_cast<int>(tolower(s[i])) - static_cast<int>('a');
int next_index = current_node->references[letter_num];
cout << "letter num: " << letter_num << " next index: " << next_index << endl;
if(next_index == 0){
node new_node;
trie.push_back(new_node);
current_node->references[letter_num] = trie.size()-1; // DOESN'T WORK! Node in Trie is NOT UPDATED
cout << "new reference value of node: ";
for(auto c:current_node->references)
cout << c << " ";
cout << endl;
current_node = &(trie[trie.size()-1]);
} else{
current_node = &trie[next_index];
}
}
current_node->end = true;
}
int main()
{
node root;
trie.push_back(root);
insert_word("hallohallo");
return 0;
}
每当std::vector<T>
进行大小调整操作时,所有迭代器和指向元素的指针都是invalidated。以您的mcve为例,考虑一下标记出的线条:
void insert_word(string s){
node *current_node = &trie[0]; // **HERE
for(int i=0;i<s.size();i++){
print_trie();
int letter_num = static_cast<int>(tolower(s[i])) - static_cast<int>('a');
int next_index = current_node->references[letter_num];
cout << "letter num: " << letter_num << " next index: " << next_index << endl;
if(next_index == 0){
node new_node;
trie.push_back(new_node); //** RESIZE
current_node->references[letter_num] = trie.size()-1;
cout << "new reference value of node: ";
for(auto c:current_node->references)
cout << c << " ";
cout << endl;
current_node = &(trie[trie.size()-1]); // **HERE
} else{
current_node = &trie[next_index]; // **HERE
}
}
current_node->end = true;
}
在标有// **HERE
的每个位置,您都存储了一个指向向量中托管对象的指针。但标记为// **RESIZE
的行可以(并且将)在达到容量后通过复制/移动/等方式调整整个矢量的大小。这意味着current_node
不再指向有效的对象,而是一个悬空的指针,但是您的代码没有那么明智,并继续前进到undefined behavior。
有两种解决方法。如果您提前知道容量,则可以从一开始就将其设为reserve
,但是对于更强大的解决方案,请不要使用指针作为开始。如果通过index而不是指针进行枚举,则解决方案如下:
void insert_word(std::string s)
{
size_t idx = 0;
for(int i=0;i<s.size();i++){
print_trie();
int letter_num = static_cast<int>(tolower(s[i])) - static_cast<int>('a');
size_t next_index = trie[idx].references[letter_num];
std::cout << "letter num: " << letter_num << " next index: " << next_index << std::endl;
if(next_index == 0){
trie.emplace_back();
trie[idx].references[letter_num] = trie.size()-1;
std::cout << "new reference value of node: ";
for(auto c : trie[idx].references)
std::cout << c << ' ';
std::cout << std::endl;
idx = trie.size()-1;
} else{
idx = next_index;
}
}
trie[idx].end = true;
}
[注意current_node
的所有实例如何被trie[idx]
替换。现在,更改“当前节点”仅是更改idx
的值,即使基础向量调整大小也很重要。
可能是由于类型不匹配而引起的int
被分配了size_t
尝试... = (int)trie.size()-1
#include <vector>
#include <iostream>
using namespace std;
struct node{
int num_words;
int references [26] = {}; //........... int
bool end;
};
vector<node> trie;
int n;
void print_trie(){
cout << "#### NEW PRINT TRIE ##### " << endl;
for(int i=0;i<trie.size();i++){
cout << "node " << i << ": ";
for(int j=0;j<26;j++)
cout << trie[i].references[j] << " ";
cout << endl;
}
}
void insert_word(const string& s){
node *current_node = &trie[0];
// current_node->references[4] = 9999 WORKS! Node in Trie is UPDATED
for(int i=0;i<s.size();i++){
print_trie();
int letter_num = int(tolower(s[i]) - 'a');
int next_index = current_node->references[letter_num];
cout << "letter num: " << letter_num << " next index: " << next_index << endl;
if(next_index == 0){
node new_node;
trie.push_back(new_node);
current_node->references[letter_num] = (int)trie.size()-1; //....size_t DOESN'T WORK! Node in Trie is NOT UPDATED
cout << "new reference value of node: ";
for(auto c:current_node->references)
cout << c << " ";
cout << endl;
current_node = &(trie[trie.size()-1]);
} else{
current_node = &trie[next_index];
}
}
current_node->end = true;
}
int main()
{
node root;
trie.push_back(root);
insert_word("hallohallo");
return 0;
}