C - 序列化后二进制文件比原始文件大得多

问题描述 投票:0回答:1

我在我的大学里有这个课程来制作一个 CSV 文件序列化程序(到一个二进制文件)。一切都运行良好,但是我注意到新创建的二进制文件有一个奇怪的行为——它的大小几乎是原始文件大小的两倍。我尝试以多种方式修复它,但它不起作用。我认为它必须在二进制文件中写入结构 Firm 时做一些事情,但我不太确定,因为我在 C 编程世界中仍然是新手。

这是代码本身:

我用来描述 CSV 文件中每条记录的结构:

typedef struct {
    unsigned int rowID;
    char orderID[20];
    char orderDate[11];
    char customer[11];
    char city[30];
    char state[30];
    unsigned int postalCode;
    char region[15];
    char productID[18];
    char category[35];
    char subCategory[25];
    float price;

} Firm;

连载过程:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "firm-structure.h"

#define MAX_LINE_LENGTH 1024

void ReduceFieldSize(char *field, char *token, size_t size){
    strncpy(field, token, size-1);
    field[size-1]='\0';
}

void Tokenization(char *pLine, Firm *pFirm){
    char *token;

    token = strtok(pLine, ",");
    pFirm -> rowID = atoi(token);

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->orderID, token, sizeof(pFirm->orderID));

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->orderDate, token, sizeof(pFirm->orderDate));


    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->customer, token, sizeof(pFirm->customer));

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->city, token, sizeof(pFirm->city));

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->state, token, sizeof(pFirm->state));

    token = strtok(NULL, ",");
    pFirm -> postalCode = atoi(token);

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->region, token, sizeof(pFirm->region));

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->productID, token, sizeof(pFirm->productID));

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->category, token, sizeof(pFirm->category));

    token = strtok(NULL, ",");
    ReduceFieldSize(pFirm->subCategory, token, sizeof(pFirm->subCategory));

    token = strtok(NULL, ",");
    pFirm -> price = atof(token);
}

void Serialize(FILE *csvFileHolder, FILE *binaryFileHolder){
    char currentLine[MAX_LINE_LENGTH];

    fgets(currentLine, MAX_LINE_LENGTH, csvFileHolder);
    fwrite(currentLine, strlen(currentLine), 1, binaryFileHolder);

    while(fgets(currentLine, MAX_LINE_LENGTH, csvFileHolder) != NULL){
        Firm firm;
        Tokenization(currentLine, &firm);
        fwrite(&firm, sizeof(firm), 1, binaryFileHolder);
    }
}

CSV文件本身有如下格式(记录例子):

1,"CA-2016-152156",11/8/2016,"CG-12520","Henderson","Kentucky",42420,"South","FUR-BO-10001798","Furniture","Bookcases",261.96

给定文件中的记录总数为 9994.

任何帮助将不胜感激!提前谢谢你!

c csv serialization binaryfiles
1个回答
0
投票

你的结构是无能的。

例子:

    char city[30];
    char state[30];

二进制文件中,每个城市和州各占30字节。在 CSV 文件中,它们的长度和长度一样长。

CSV 文件中的“Pila,Poland”是 11 个字节(包括逗号)- 在您的二进制文件中它们将占用 60 个字节-几乎是 6 倍。

© www.soinside.com 2019 - 2024. All rights reserved.