当我尝试运行代码时,如果字符串大小中有除
32
之外的任何内容,我将收到分段错误。我尝试过 16
、48
、64
,使用这些我会遇到分段错误或总线错误。有人可以向我解释一下这是为什么吗?
这是我的完整代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Investigate this weird behaviour!
// Str_size must for some reason be 32, or sometimes 16
// to not get a bus error, which makes no sense since we
// are at most trying to put 12 chars inside of it. What are we
// missing?
#define STR_SIZE 32
struct Relationship
{
char source[STR_SIZE];
char target[STR_SIZE];
int weight;
};
int main()
{
// Open file
FILE *my_file = fopen("thrones.csv", "r");
if (my_file == NULL) {
puts("ERROR, file is empty!");
return -1;
}
// Check file length
int num_lines = 0;
bool done = false;
while (!done) {
char c = getc(my_file);
if (c == EOF) {
done = true;
}
if (c == '\n') {
num_lines++;
}
}
//Size relationship array based on num_lines:
Relationship relations[num_lines];
// Replace all of the elements to 0
for (int i = 0; i < num_lines; i++) {
for (int j = 0; j < STR_SIZE; j++) {
relations[i].source[j] = 0;
relations[i].target[j] = 0;
}
relations[i].weight = 0;
}
// Return to beginning of file
fseek(my_file, 0, SEEK_SET);
// Read and print out contents from file
char buffer[32];
int row = 0;
int column = 0;
while (fgets(buffer, 32, my_file) != NULL) {
printf("Read %lu characters in line.\n", strlen(buffer));
column = 0;
// Splitting the data
char *value = strtok(buffer, ",");
while (value) {
// Prints out source
if (column == 0) {
printf("Source: ");
strcpy(relations[row].source, value);
printf("%s", relations[row].source);
// printf("(%lu)", strlen(value));
}
// Prints out target
if (column == 1) {
printf("\tTarget: ");
strcpy(relations[row].target, value);
printf("%s", relations[row].target);
// printf("(%lu)", strlen(value));
}
// Prints out weight
if (column == 2) {
printf("\tWeight: ");
relations[row].weight = atoi(value);
printf("%d", relations[row].weight);
// printf("(%lu)", strlen(value));
}
value = strtok(NULL, ",");
column += 1;
}
printf("\n");
row++;
}
fclose(my_file);
return 0;
};
我的文件是一个csv文件,其中包括《权力的游戏》的名字、每个角色之间的关系以及他们见面的次数。
第一个也是唯一一个起作用但实际上不起作用的想法是问题是内存问题。它充满了垃圾,没有任何有用的东西。
所以我这样做是为了解决这个问题:
// Replace all of the elements to 0
for (int i = 0; i < num_lines; i++) {
for (int j = 0; j < STR_SIZE; j++) {
relations[i].source[j] = 0;
relations[i].target[j] = 0;
}
relations[i].weight = 0;
}
thornes 文件如下所示:
Aemon,Grenn,5
Aemon,Samwell,31
Aerys,Jaime,18
Aerys,Robert,6
Aerys,Tyrion,5
Aerys,Tywin,8
Alliser,Mance,5
Amory,Oberyn,5
Arya,Anguy,11
Arya,Beric,23
...
为了解决所有问题,我并没有真正用 C 编写,而是用 C++ 编写,即使我认为我是这样,这里是在 c 中工作的更新版本:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#define STR_SIZE 16
typedef struct Relationship
{
char source[STR_SIZE];
char target[STR_SIZE];
int weight;
} Relationship;
int main()
{
// Open file
FILE *my_file = fopen("thrones.csv", "r");
if (my_file == NULL)
{
puts("ERROR, file is empty!");
return -1;
}
// Size relationship array based on num_lines:
Relationship relations[352];
// Replace all of the elements to 0
for (int i = 0; i < 352; i++)
{
for (int j = 0; j < STR_SIZE; j++)
{
relations[i].source[j] = 0;
relations[i].target[j] = 0;
}
relations[i].weight = 0;
}
// Return to beginning of file
fseek(my_file, 0, SEEK_SET);
// Read and print out contents from file
char buffer[32];
int row = 0;
int column = 0;
while (fgets(buffer, 32, my_file) != NULL)
{
column = 0;
// Splitting the data
char *value = strtok(buffer, ",");
while (value)
{
// Prints out source
if (column == 0)
{
printf("%d Source: ", row);
strcpy(relations[row].source, value);
printf("%s", relations[row].source);
}
// Prints out target
if (column == 1)
{
printf("\tTarget: ");
strcpy(relations[row].target, value);
printf("%s", relations[row].target);
}
// Prints out weight
if (column == 2)
{
printf("\tWeight: ");
relations[row].weight = atoi(value);
printf("%d", relations[row].weight);
}
value = strtok(NULL, ",");
column += 1;
}
printf("\n");
row++;
}
fclose(my_file);
return 0;
}
当我根据您提供的数据运行您提供的 C++ 代码时,它不会崩溃,而是打印出以下内容:
fritz:~/tmp/cthrones$ gcc -o thrones thrones.cpp
fritz:~/tmp/cthrones$ ./thrones
Read 14 characters in line.
Source: Aemon Target: Grenn Weight: 5
Read 17 characters in line.
Source: Aemon Target: Samwell Weight: 31
Read 15 characters in line.
Source: Aerys Target: Jaime Weight: 18
Read 15 characters in line.
Source: Aerys Target: Robert Weight: 6
Read 15 characters in line.
Source: Aerys Target: Tyrion Weight: 5
Read 14 characters in line.
Source: Aerys Target: Tywin Weight: 8
Read 16 characters in line.
Source: Alliser Target: Mance Weight: 5
Read 15 characters in line.
Source: Amory Target: Oberyn Weight: 5
Read 14 characters in line.
Source: Arya Target: Anguy Weight: 11
Read 13 characters in line.
Source: Arya Target: ix/op5060-010:/tmp/.ICE-unix/6891 Weight: 23
打印的最后一个“目标:”是我的环境变量的一部分:
SESSION_MANAGER=local/op5060-010:@/tmp/.ICE-unix/6891,unix/op5060-010:/tmp/.ICE-unix/6891
有明显迹象表明发生了一些越界访问。正如 @Oka 在评论中指出的那样,这是由计算文件中的行数时出现的离一错误引起的,除非你的最后一行以换行符结束 '。当我添加换行符时,错误消失了。
这让我回到了最初的反应:不要使用 C/C++ 进行这样的算法工作! 特别是低级的东西,比如
strcpy
和单独的字节操作。有太多事情需要你完全正确,否则你几乎肯定会搬起石头砸自己的脚。如果您必须使用C++,请使用标准库中的抽象数据类型,例如std::string
和std::vector
,而不是低级数组。更好的是:使用专为数据分析而设计的语言,例如带有 pandas 和 numpy 的 Python。