为什么我的碰撞次数这么高? (C++ 哈希表)

问题描述 投票:0回答:1

我正在用哈希表做作业。哈希表函数已经完成,我们只需实现哈希表重新哈希大小计数、负载因子计算、冲突计数器和拼写检查。除了碰撞计数器之外,一切都已完成,由于某种原因,碰撞计数器比我应该得到的要高得多。 (我们的预期输出为 42802,我得到 55535)。我唯一能想到的是哈希表使用延迟删除,所以也许我没有正确考虑这一点?我不知道。这里分别是头文件和cpp文件。

#ifndef QUADRATIC_PROBING_CPP
#define QUADRATIC_PROBING_CPP

#include <iostream>
#include "QuadraticProbing.h"
using namespace std;


template <class HashedObj>
HashTable<HashedObj>::HashTable( int size )
  : array( nextPrime( size ) )
{
  makeEmpty();
}

template <class HashedObj>
void HashTable<HashedObj>::makeEmpty( )
{
  
  currentSize = 0;
  for( int i = 0; i < array.size( ); i++ )
    {
      array[ i ].info = EMPTY;
    }
}

template<class HashedObj>
double HashTable<HashedObj>::loadFactor() const{
  return static_cast<double>(currentSize) / array.size();
}

template <class HashedObj>
bool HashTable<HashedObj>::isPrime( int n )
{
    if( n == 2 || n == 3 )
        return true;

    if( n == 1 || n % 2 == 0 )
        return false;

    for( int i = 3; i * i <= n; i += 2 )
        if( n % i == 0 )
            return false;

    return true;
}

template <class HashedObj>
int HashTable<HashedObj>::nextPrime( int n )
{
    if( n % 2 == 0 )
        n++;

    for( ; !isPrime( n ); n += 2 )
        ;

    return n;
}


template <class HashedObj>
bool HashTable<HashedObj>::contains( const HashedObj & x ) const
{
  return isActive( findPos( x ) );
}


template <class HashedObj>
bool HashTable<HashedObj>::insert( const HashedObj & x, int &rehashCounter, int &sizeIncrease, int &collisionCount)
{
  
  // Insert x as active
  int currentPos = findPos( x );
  if( isActive( currentPos ) )
    return false;
  
  array[ currentPos ].element = x;
  array[ currentPos ].info = ACTIVE;

  // Rehash
  if( ++currentSize > array.size( ) / 2 ){
    
    int oldSize = array.size();
    vector<HashEntry> oldArray = array;
    rehash(rehashCounter, sizeIncrease, collisionCount);
    int newSize = array.size();

    for (int i = 0; i < oldSize; i++){
    int newPos = findPos(oldArray[i].element);
    if (oldArray[i].info == ACTIVE && newPos != currentPos) {
        // Check if the element from the old array collided with a different element
        collisionCount++;
    }
  }
    sizeIncrease += newSize - oldSize;
    cout << "Rehashing. New size is " << sizeIncrease << endl;
  }

  return true;
}

template <class HashedObj>
bool HashTable<HashedObj>::remove( const HashedObj & x )
{
  int currentPos = findPos( x );
  if( !isActive( currentPos ) )
    return false;
  
  array[ currentPos ].info = DELETED;
  return true;
}


template <class HashedObj>
bool HashTable<HashedObj>::isActive( int currentPos ) const
{
  return array[ currentPos ].info == ACTIVE;
}

template <class HashedObj>
int HashTable<HashedObj>::findPos( const HashedObj & x ) const
{
  int offset = 1;
  int currentPos = myhash( x );

  while( array[ currentPos ].info != EMPTY &&
     array[ currentPos ].element != x )
    {
      currentPos += offset;  // Compute ith probe
      offset += 2;
      if( currentPos >= array.size( ) )
    currentPos -= array.size( );
    }

  return currentPos;
}

template <class HashedObj>
void HashTable<HashedObj>::rehash(int &counter, int &sizeIncrease, int &collisionCount)
{
  
  vector<HashEntry> oldArray = array;
  int oldSize = array.size();

  // Create new double-sized, empty table
  array.resize( nextPrime( 2 * oldArray.size( ) ) );
  for( int j = 0; j < array.size( ); j++ )
    array[ j ].info = EMPTY;
  
  
  // Copy table over
  currentSize = 0;
  for( int i = 0; i < oldArray.size( ); i++ )
    if( oldArray[ i ].info == ACTIVE )
      insert(oldArray[ i ].element, counter, sizeIncrease, collisionCount);
}



template <class HashedObj>
int HashTable<HashedObj>::myhash( const HashedObj & x ) const
{
  int hashVal= hashKey(x); 
  
  hashVal %= array.size( );
  if( hashVal < 0 )
    hashVal += array.size( );
  
  return hashVal;
}

template <class HashedObj>
int HashTable<HashedObj>::hashKey( int key) const
{
  return key;
}

template <class HashedObj>
int HashTable<HashedObj>::hashKey(const string & key) const
{
  int hashVal = 0;

  for( int i = 0; i < key.length( ); i++ )
    hashVal = 37 * hashVal + key[ i ];
  
  return hashVal;
  
}


#endif

#ifndef QUADRATIC_PROBING_H
#define QUADRATIC_PROBING_H

#include <vector>
#include <string>
using namespace std;



enum EntryType { ACTIVE, EMPTY, DELETED };

// QuadraticProbing Hash table class
//
// CONSTRUCTION: an approximate initial size or default of 101
//
// ******************PUBLIC OPERATIONS*********************
// bool insert( x )       --> Insert x
// bool remove( x )       --> Remove x
// bool contains( x )     --> Return true if x is present
// void makeEmpty( )      --> Remove all items
// int hash( string str ) --> Global method to hash strings

template <class HashedObj>
class HashTable
{
public:

  //constuctor, default size: 101
  HashTable(int size = 101);

  //Return true if x is present
  bool contains( const HashedObj & x ) const;

  //Remove all items from the hash table
  void makeEmpty( );
    
  //insert x
  bool insert( const HashedObj & x, int &rehashCounter, int &sizeIncrease, int &collisionCount);

  //remove x
  bool remove( const HashedObj & x );

  double loadFactor() const;

  
private:
  //hash table entry
  struct HashEntry
  {
    HashedObj element;
    EntryType info;
  };


  //hash table
  vector<HashEntry> array;

  //size of the hash table
  int currentSize;

  int collisionCount;

  //check to see if the given position is active
  bool isActive( int currentPos ) const;

  //find the position of given x
  int findPos( const HashedObj & x ) const;

  //rehash
  void rehash(int &counter, int &sizeIncrease, int &collisionCount);

  //return the hash table index of x
  int myhash( const HashedObj & x ) const;

  //find the next prime greater than n
  int nextPrime( int n );

  //check if n is prime
  bool isPrime( int n );

  //compute the hash key when x is integer
  int hashKey(int key) const;

  //compute the hash key when x is a string
  int hashKey(const string & key) const;

  
};

#include "QuadraticProbing.cpp"

#endif

#include <iostream>
#include <iomanip>
#include "QuadraticProbing.h"
#include <string>
#include <fstream>
using namespace std;

// Simple main
int main( )
{
  std::cout << std::setprecision(2);
  int rehashCount = 0;
  int sizeIncrease = 101;
  int collisionCount = 0;
  int wordLength;
  string word, wordOriginal;
  bool check1 = false;
  bool check2 = false;
  bool check3 = false;
  vector<string> s;
  HashTable<string> table3;
  char alpha[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
                'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
  string myText;
  char c = 0;

  cout << "Insert words\n";
  std::ifstream myReadFile("words.dat");
  while(std::getline(myReadFile, myText)){
    table3.insert(myText, rehashCount, sizeIncrease, collisionCount);
  }

  cout << "\nThe load factor is " << table3.loadFactor() << "." << endl;
  cout << "Collision count: " << collisionCount << endl;

  
  
  for (char &c : word) { //Convert to lowercase for search
    c = tolower(c);
  }

  while(true){

  s.clear();

  cout << "\nEnter a word for spell checking (enter done to exit):  ";
  cin >> word;

  if(word.find("done") != std::string::npos){
    break;
  }
  
  if(table3.contains(word))
  {
    cout << "Correct! " << endl;
    break;
  }
    //logic when word not found, and word is just missing character
    wordLength = word.length();

    wordOriginal = word;
    for(int j = 0; j < 25; j++){

      word = wordOriginal;

      word.insert(0, 1, alpha[j]);

      for (int i = 0; i < wordLength; i++){

        c = word[i];
        word[i] = word[i + 1];
        word[i + 1]= c;
        if(table3.contains(word)){
          s.push_back(word);
          break;
        } 
      }
  }
  
    //logic for when word not found, and word has extra character
    word = wordOriginal;
    for(int i = 0; i < wordLength; i++){
      word = word.erase(i, 1);
      if(table3.contains(word)){
        s.push_back(word);
        
      }
      word = wordOriginal;
      
    }
  
  
    word = wordOriginal;
    int counter = 0;
  
  for(int i = 0; i < wordLength; i++){
    word = wordOriginal;
    
    for(int j=0; j < 25; j++){
      word = word.replace(i, 1, 1, alpha[j]);
      if(table3.contains(word)){
        
        s.push_back(word);
        
      }
    }
  }
  cout << "Suggestions: \n";
  for (const auto& element : s){
    cout << element << " ";
  }

  

  }
  cout << "Bye. ";
  
}

c++ hashtable collision
1个回答
0
投票

弄清楚了:我数错了地方。在

collisionCount
函数的
while
循环中添加
findPos
给出了正确的计数。

template <class HashedObj>
int HashTable<HashedObj>::findPos( const HashedObj & x ) const
{
  int offset = 1;
  int currentPos = myhash( x );

  while( array[ currentPos ].info != EMPTY &&
     array[ currentPos ].element != x )
    {
      currentPos += offset;  // Compute ith probe
      offset += 2;
      collisionCount++;
      if( currentPos >= array.size( ) ){
        currentPos -= array.size( );
      }
    }
  
  return currentPos;
}
© www.soinside.com 2019 - 2024. All rights reserved.