我试图从文本文件中找到唯一的单词计数。但由于某种原因,我的号码总是关闭。我的常规字数很好。
我的字符串数组wordArr
包含文件中的所有单词。
我发现每个单词在被发现不唯一之后尝试将其分配给另一个数组,然后我查看我已经通过的单词列表,看它是否与当前显示的单词匹配。如果单词匹配我将oldWord
设置为true,并且该单词不计入我的unique
计数。
//New portion
int main(int argc, char *argv[]) {
//File Paths
ifstream fp;
fp.open(argv[1]);
if (fp.fail()) {
cout << "Error No file" << endl;
return 0;
}
string wordArr[10000];
string words;
string temp;
int wordCount = 0;
while (fp >> words) {
int newWord = 0;
for (int i; i < words.length(); i++) {
if (isalpha(words[i])) {
} else {
wordArr[wordCount++] = words.substr(0, i);
//wordCount++;
newWord = 1;
if(words[i] + 1 != '\0') {
for (int j = i + 1; j < words.length(); j++) {
temp = temp +words[j];
}
wordArr[wordCount++] = temp;
//wordCount++;
}
}
}
if (newWord == 0) {
wordArr[wordCount] = words;
wordCount++;
}
}
cout << "Number of words found was: " << wordCount << endl;
//New portion
// makes all lower
for(int k=0; k<wordCount;k++){ //need to find size of array
for(int l=0; l<wordArr[k].length(); l++){
tolower(wordArr[k].at(l));
}
}
//unique count
string tempArr[10000];
int unique=0;
int oldWord=0;
for(int m=0; m<wordCount;m++ ) {
for (int n = 0; n < wordCount; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
}
}
if(oldWord==0){
wordArr[m] = tempArr[n];
unique++;
}
}
cout << "Unique word count is: " << unique << endl;
}
我希望从我的测试用例中获得52个独特的单词,但最终只得到37个。
测试用例:
密码学是用于私下和安全地通信和/或存储信息或数据的技术的实践和研究,而不是被第三方拦截。这可以包括加密,散列和隐写等过程。直到现代,密码学几乎全部称为加密,但现在密码学是一个广泛的领域,应用于我们生活的许多关键领域。
您的解析代码具有错误的逻辑(事实上,它甚至不编译)。如何分解非字母字符上的单词,查找和跟踪重复单词的方式,甚至是如何使用较低级别的单词,都存在逻辑错误。
简而言之,这整个代码充满了需要修复的错误,例如:
#include <iostream>
#include <fstream>
#include <string>
#include <ctype.h>
using namespace std;
int main(int argc, char *argv[]) {
//File Paths
ifstream fp;
fp.open(argv[1]);
if (!fp.is_open()) {
cout << "Error No file" << endl;
return 0;
}
string wordArr[10000];
string words;
int wordCount = 0;
while ((fp >> words) && (wordCount < 10000)) {
for (int i = 0; i < words.length(); ++i) {
if (!isalpha(words[i])) {
wordArr[wordCount++] = words.substr(0, i);
if (wordCount == 10000) break;
++i;
while ((i < words.length()) && (!isalpha(words[i]))) {
++i;
}
words.erase(0, i);
i = -1;
}
}
if (words.length() > 0) {
wordArr[wordCount++] = words;
}
}
cout << "Number of words found was: " << wordCount << endl;
// makes all lower
for(int k=0; k<wordCount;k++){ //need to find size of array
for(int l=0; l<wordArr[k].length(); l++){
wordArr[k][l] = tolower(wordArr[k][l]);
}
}
//unique count
string tempArr[10000];
int unique=0;
for(int m=0; m<wordCount;m++ ) {
int oldWord=0;
for (int n = 0; n < unique; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
break;
}
}
if(oldWord==0){
tempArr[unique++] = wordArr[m];
}
}
cout << "Unique word count is: " << unique << endl;
}
Now the code works as expected:
Number of words found was: 64 Unique word count is: 52
您需要在每次迭代中重置oldWord:
//unique count
string tempArr[10000];
int unique=0;
int oldWord=0;
for(int m=0; m<wordCount;m++ ) {
for (int n = 0; n < wordCount; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
}
}
if(oldWord==0){
wordArr[m] = tempArr[n];
unique++;
}
// reset the oldWord variable here
oldWord=0;
}
cout << "Unique word count is: " << unique << endl;
}