我正在用哈希表做作业。哈希表函数已经完成,我们只需实现哈希表重新哈希大小计数、负载因子计算、冲突计数器和拼写检查。除了碰撞计数器之外,一切都已完成,由于某种原因,碰撞计数器比我应该得到的要高得多。 (我们的预期输出为 42802,我得到 55535)。我唯一能想到的是哈希表使用延迟删除,所以也许我没有正确考虑这一点?我不知道。这里分别是头文件和cpp文件。
#ifndef QUADRATIC_PROBING_CPP
#define QUADRATIC_PROBING_CPP
#include <iostream>
#include "QuadraticProbing.h"
using namespace std;
template <class HashedObj>
HashTable<HashedObj>::HashTable( int size )
: array( nextPrime( size ) )
{
makeEmpty();
}
template <class HashedObj>
void HashTable<HashedObj>::makeEmpty( )
{
currentSize = 0;
for( int i = 0; i < array.size( ); i++ )
{
array[ i ].info = EMPTY;
}
}
template<class HashedObj>
double HashTable<HashedObj>::loadFactor() const{
return static_cast<double>(currentSize) / array.size();
}
template <class HashedObj>
bool HashTable<HashedObj>::isPrime( int n )
{
if( n == 2 || n == 3 )
return true;
if( n == 1 || n % 2 == 0 )
return false;
for( int i = 3; i * i <= n; i += 2 )
if( n % i == 0 )
return false;
return true;
}
template <class HashedObj>
int HashTable<HashedObj>::nextPrime( int n )
{
if( n % 2 == 0 )
n++;
for( ; !isPrime( n ); n += 2 )
;
return n;
}
template <class HashedObj>
bool HashTable<HashedObj>::contains( const HashedObj & x ) const
{
return isActive( findPos( x ) );
}
template <class HashedObj>
bool HashTable<HashedObj>::insert( const HashedObj & x, int &rehashCounter, int &sizeIncrease, int &collisionCount)
{
// Insert x as active
int currentPos = findPos( x );
if( isActive( currentPos ) )
return false;
array[ currentPos ].element = x;
array[ currentPos ].info = ACTIVE;
// Rehash
if( ++currentSize > array.size( ) / 2 ){
int oldSize = array.size();
vector<HashEntry> oldArray = array;
rehash(rehashCounter, sizeIncrease, collisionCount);
int newSize = array.size();
for (int i = 0; i < oldSize; i++){
int newPos = findPos(oldArray[i].element);
if (oldArray[i].info == ACTIVE && newPos != currentPos) {
// Check if the element from the old array collided with a different element
collisionCount++;
}
}
sizeIncrease += newSize - oldSize;
cout << "Rehashing. New size is " << sizeIncrease << endl;
}
return true;
}
template <class HashedObj>
bool HashTable<HashedObj>::remove( const HashedObj & x )
{
int currentPos = findPos( x );
if( !isActive( currentPos ) )
return false;
array[ currentPos ].info = DELETED;
return true;
}
template <class HashedObj>
bool HashTable<HashedObj>::isActive( int currentPos ) const
{
return array[ currentPos ].info == ACTIVE;
}
template <class HashedObj>
int HashTable<HashedObj>::findPos( const HashedObj & x ) const
{
int offset = 1;
int currentPos = myhash( x );
while( array[ currentPos ].info != EMPTY &&
array[ currentPos ].element != x )
{
currentPos += offset; // Compute ith probe
offset += 2;
if( currentPos >= array.size( ) )
currentPos -= array.size( );
}
return currentPos;
}
template <class HashedObj>
void HashTable<HashedObj>::rehash(int &counter, int &sizeIncrease, int &collisionCount)
{
vector<HashEntry> oldArray = array;
int oldSize = array.size();
// Create new double-sized, empty table
array.resize( nextPrime( 2 * oldArray.size( ) ) );
for( int j = 0; j < array.size( ); j++ )
array[ j ].info = EMPTY;
// Copy table over
currentSize = 0;
for( int i = 0; i < oldArray.size( ); i++ )
if( oldArray[ i ].info == ACTIVE )
insert(oldArray[ i ].element, counter, sizeIncrease, collisionCount);
}
template <class HashedObj>
int HashTable<HashedObj>::myhash( const HashedObj & x ) const
{
int hashVal= hashKey(x);
hashVal %= array.size( );
if( hashVal < 0 )
hashVal += array.size( );
return hashVal;
}
template <class HashedObj>
int HashTable<HashedObj>::hashKey( int key) const
{
return key;
}
template <class HashedObj>
int HashTable<HashedObj>::hashKey(const string & key) const
{
int hashVal = 0;
for( int i = 0; i < key.length( ); i++ )
hashVal = 37 * hashVal + key[ i ];
return hashVal;
}
#endif
#ifndef QUADRATIC_PROBING_H
#define QUADRATIC_PROBING_H
#include <vector>
#include <string>
using namespace std;
enum EntryType { ACTIVE, EMPTY, DELETED };
// QuadraticProbing Hash table class
//
// CONSTRUCTION: an approximate initial size or default of 101
//
// ******************PUBLIC OPERATIONS*********************
// bool insert( x ) --> Insert x
// bool remove( x ) --> Remove x
// bool contains( x ) --> Return true if x is present
// void makeEmpty( ) --> Remove all items
// int hash( string str ) --> Global method to hash strings
template <class HashedObj>
class HashTable
{
public:
//constuctor, default size: 101
HashTable(int size = 101);
//Return true if x is present
bool contains( const HashedObj & x ) const;
//Remove all items from the hash table
void makeEmpty( );
//insert x
bool insert( const HashedObj & x, int &rehashCounter, int &sizeIncrease, int &collisionCount);
//remove x
bool remove( const HashedObj & x );
double loadFactor() const;
private:
//hash table entry
struct HashEntry
{
HashedObj element;
EntryType info;
};
//hash table
vector<HashEntry> array;
//size of the hash table
int currentSize;
int collisionCount;
//check to see if the given position is active
bool isActive( int currentPos ) const;
//find the position of given x
int findPos( const HashedObj & x ) const;
//rehash
void rehash(int &counter, int &sizeIncrease, int &collisionCount);
//return the hash table index of x
int myhash( const HashedObj & x ) const;
//find the next prime greater than n
int nextPrime( int n );
//check if n is prime
bool isPrime( int n );
//compute the hash key when x is integer
int hashKey(int key) const;
//compute the hash key when x is a string
int hashKey(const string & key) const;
};
#include "QuadraticProbing.cpp"
#endif
#include <iostream>
#include <iomanip>
#include "QuadraticProbing.h"
#include <string>
#include <fstream>
using namespace std;
// Simple main
int main( )
{
std::cout << std::setprecision(2);
int rehashCount = 0;
int sizeIncrease = 101;
int collisionCount = 0;
int wordLength;
string word, wordOriginal;
bool check1 = false;
bool check2 = false;
bool check3 = false;
vector<string> s;
HashTable<string> table3;
char alpha[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
string myText;
char c = 0;
cout << "Insert words\n";
std::ifstream myReadFile("words.dat");
while(std::getline(myReadFile, myText)){
table3.insert(myText, rehashCount, sizeIncrease, collisionCount);
}
cout << "\nThe load factor is " << table3.loadFactor() << "." << endl;
cout << "Collision count: " << collisionCount << endl;
for (char &c : word) { //Convert to lowercase for search
c = tolower(c);
}
while(true){
s.clear();
cout << "\nEnter a word for spell checking (enter done to exit): ";
cin >> word;
if(word.find("done") != std::string::npos){
break;
}
if(table3.contains(word))
{
cout << "Correct! " << endl;
break;
}
//logic when word not found, and word is just missing character
wordLength = word.length();
wordOriginal = word;
for(int j = 0; j < 25; j++){
word = wordOriginal;
word.insert(0, 1, alpha[j]);
for (int i = 0; i < wordLength; i++){
c = word[i];
word[i] = word[i + 1];
word[i + 1]= c;
if(table3.contains(word)){
s.push_back(word);
break;
}
}
}
//logic for when word not found, and word has extra character
word = wordOriginal;
for(int i = 0; i < wordLength; i++){
word = word.erase(i, 1);
if(table3.contains(word)){
s.push_back(word);
}
word = wordOriginal;
}
word = wordOriginal;
int counter = 0;
for(int i = 0; i < wordLength; i++){
word = wordOriginal;
for(int j=0; j < 25; j++){
word = word.replace(i, 1, 1, alpha[j]);
if(table3.contains(word)){
s.push_back(word);
}
}
}
cout << "Suggestions: \n";
for (const auto& element : s){
cout << element << " ";
}
}
cout << "Bye. ";
}
弄清楚了:我数错了地方。在
collisionCount
函数的 while
循环中添加 findPos
给出了正确的计数。
template <class HashedObj>
int HashTable<HashedObj>::findPos( const HashedObj & x ) const
{
int offset = 1;
int currentPos = myhash( x );
while( array[ currentPos ].info != EMPTY &&
array[ currentPos ].element != x )
{
currentPos += offset; // Compute ith probe
offset += 2;
collisionCount++;
if( currentPos >= array.size( ) ){
currentPos -= array.size( );
}
}
return currentPos;
}