在 C 语言中,当试图在其中仅放入 12 个字符时,为什么字符串大小需要为 32 或有时需要为 16

问题描述 投票:0回答:1

当我尝试运行代码时,如果字符串大小中有除

32
之外的任何内容,我将收到分段错误。我尝试过
16
48
64
,使用这些我会遇到分段错误或总线错误。有人可以向我解释一下这是为什么吗?

这是我的完整代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Investigate this weird behaviour!
// Str_size must for some reason be 32, or sometimes 16
// to not get a bus error, which makes no sense since we 
// are at most trying to put 12 chars inside of it. What are we
// missing?

#define STR_SIZE 32

struct Relationship
{
    char source[STR_SIZE];
    char target[STR_SIZE];
    int weight;
};

int main()
{
    // Open file
    FILE *my_file = fopen("thrones.csv", "r");
    if (my_file == NULL) {
        puts("ERROR, file is empty!");
        return -1;
    }

    // Check file length
    int num_lines = 0;
    bool done = false;
    while (!done) {
        char c = getc(my_file);
        if (c == EOF) {
            done = true;
        }
        if (c == '\n') {
            num_lines++;
        }
    }

    //Size relationship array based on num_lines:
    Relationship relations[num_lines];

    // Replace all of the elements to 0
    for (int i = 0; i < num_lines; i++) {
        for (int j = 0; j < STR_SIZE; j++) {
            relations[i].source[j] = 0;
            relations[i].target[j] = 0;
        }
        relations[i].weight = 0;
    } 

    // Return to beginning of file
    fseek(my_file, 0, SEEK_SET);

    // Read and print out contents from file
    char buffer[32];
    int row = 0;
    int column = 0;
    while (fgets(buffer, 32, my_file) != NULL) {
        printf("Read %lu characters in line.\n", strlen(buffer));
        column = 0;

        // Splitting the data
        char *value = strtok(buffer, ",");

        while (value) {
            // Prints out source
            if (column == 0) {
                printf("Source: ");
                strcpy(relations[row].source, value);
                printf("%s", relations[row].source);
                // printf("(%lu)", strlen(value));
            }
            // Prints out target
            if (column == 1) {
                printf("\tTarget: ");
                strcpy(relations[row].target, value);
                printf("%s", relations[row].target);
                // printf("(%lu)", strlen(value));
            }
            // Prints out weight
            if (column == 2) {
                printf("\tWeight: ");
                relations[row].weight = atoi(value);
                printf("%d", relations[row].weight);
                // printf("(%lu)", strlen(value));
            }
            value = strtok(NULL, ",");
            column += 1;
        }
        printf("\n");
        row++;
    }
    fclose(my_file);
    return 0;
};

我的文件是一个csv文件,其中包括《权力的游戏》的名字、每个角色之间的关系以及他们见面的次数。

第一个也是唯一一个起作用但实际上不起作用的想法是问题是内存问题。它充满了垃圾,没有任何有用的东西。

所以我这样做是为了解决这个问题:

// Replace all of the elements to 0
    for (int i = 0; i < num_lines; i++) {
        for (int j = 0; j < STR_SIZE; j++) {
            relations[i].source[j] = 0;
            relations[i].target[j] = 0;
        }
        relations[i].weight = 0;
    } 

thornes 文件如下所示:

Aemon,Grenn,5
Aemon,Samwell,31
Aerys,Jaime,18
Aerys,Robert,6
Aerys,Tyrion,5
Aerys,Tywin,8
Alliser,Mance,5
Amory,Oberyn,5
Arya,Anguy,11
Arya,Beric,23
...

为了解决所有问题,我并没有真正用 C 编写,而是用 C++ 编写,即使我认为我是这样,这里是在 c 中工作的更新版本:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#define STR_SIZE 16

typedef struct Relationship
{
    char source[STR_SIZE];
    char target[STR_SIZE];
    int weight;
} Relationship;

int main()
{
    // Open file
    FILE *my_file = fopen("thrones.csv", "r");
    if (my_file == NULL)
    {
        puts("ERROR, file is empty!");
        return -1;
    }

    // Size relationship array based on num_lines:
    Relationship relations[352];

    // Replace all of the elements to 0
    for (int i = 0; i < 352; i++)
    {
        for (int j = 0; j < STR_SIZE; j++)
        {
            relations[i].source[j] = 0;
            relations[i].target[j] = 0;
        }
        relations[i].weight = 0;
    }

    // Return to beginning of file
    fseek(my_file, 0, SEEK_SET);

    // Read and print out contents from file
    char buffer[32];
    int row = 0;
    int column = 0;
    while (fgets(buffer, 32, my_file) != NULL)
    {
        column = 0;

        // Splitting the data
        char *value = strtok(buffer, ",");

        while (value)
        {
            // Prints out source
            if (column == 0)
            {
                printf("%d Source: ", row);
                strcpy(relations[row].source, value);
                printf("%s", relations[row].source);
            }
            // Prints out target
            if (column == 1)
            {
                printf("\tTarget: ");
                strcpy(relations[row].target, value);
                printf("%s", relations[row].target);
            }
            // Prints out weight
            if (column == 2)
            {
                printf("\tWeight: ");
                relations[row].weight = atoi(value);
                printf("%d", relations[row].weight);
            }
            value = strtok(NULL, ",");
            column += 1;
        }
        printf("\n");
        row++;
    }
    fclose(my_file);
    return 0;
}
c string segmentation-fault char
1个回答
0
投票

当我根据您提供的数据运行您提供的 C++ 代码时,它不会崩溃,而是打印出以下内容:

fritz:~/tmp/cthrones$ gcc -o thrones thrones.cpp
fritz:~/tmp/cthrones$ ./thrones 
Read 14 characters in line.
Source: Aemon   Target: Grenn   Weight: 5
Read 17 characters in line.
Source: Aemon   Target: Samwell Weight: 31
Read 15 characters in line.
Source: Aerys   Target: Jaime   Weight: 18
Read 15 characters in line.
Source: Aerys   Target: Robert  Weight: 6
Read 15 characters in line.
Source: Aerys   Target: Tyrion  Weight: 5
Read 14 characters in line.
Source: Aerys   Target: Tywin   Weight: 8
Read 16 characters in line.
Source: Alliser Target: Mance   Weight: 5
Read 15 characters in line.
Source: Amory   Target: Oberyn  Weight: 5
Read 14 characters in line.
Source: Arya    Target: Anguy   Weight: 11
Read 13 characters in line.
Source: Arya    Target: ix/op5060-010:/tmp/.ICE-unix/6891   Weight: 23

打印的最后一个“目标:”是我的环境变量的一部分:

SESSION_MANAGER=local/op5060-010:@/tmp/.ICE-unix/6891,unix/op5060-010:/tmp/.ICE-unix/6891

有明显迹象表明发生了一些越界访问。正如 @Oka 在评论中指出的那样,这是由计算文件中的行数时出现的离一错误引起的,除非你的最后一行以换行符结束 '。当我添加换行符时,错误消失了。

这让我回到了最初的反应:不要使用 C/C++ 进行这样的算法工作! 特别是低级的东西,比如

strcpy
和单独的字节操作。有太多事情需要你完全正确,否则你几乎肯定会搬起石头砸自己的脚。如果您必须使用C++,请使用标准库中的抽象数据类型,例如
std::string
std::vector
,而不是低级数组。更好的是:使用专为数据分析而设计的语言,例如带有 pandas 和 numpy 的 Python。

© www.soinside.com 2019 - 2024. All rights reserved.