C - 读取 CSV 来浮动

问题描述 投票:0回答:2

这不是一个问题。这是未来观看者的解决方案。

我提供了读取 CSV 浮动的代码。 如果有人需要用谷歌搜索,这可能会有所帮助。

不过,由于我只是一名本科生,这段代码可能在安全性和内存泄漏方面并不完美。如果通过使用缓存优化不需要过多使用指针,它也可以具有更高的性能。

但是,它确实有效,而且总比没有好,

如果您想重构此代码或解释如何做得更好(为了最大限度地利用 stackoverflow 的能力,请随意这样做)

数据.csv

0.1,0.2,0.3,0.4,0.5
1.1,1.2,1.3,1.4,1.5
2.1,2.2,2.3,2.4,2.5
3.1,3.2,3.3,3.3,3.5
4.1,4.2,4.3,4.4,4.5

输出:

------------START OF PROGRAM------------
Filesize: 99 Bytes

CSV_content =
[ 0.100000 0.200000 0.300000 0.400000 0.500000 ]
[ 1.100000 1.200000 1.300000 1.400000 1.500000 ]
[ 2.100000 2.200000 2.300000 2.400000 2.500000 ]
[ 3.100000 3.200000 3.300000 3.300000 3.500000 ]
[ 4.100000 4.200000 4.300000 4.400000 4.500000 ]

------------END OF PROGRAM------------
arrays c csv multidimensional-array file-read
2个回答
0
投票

旧答案:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>


const unsigned long MAX_NUMBER_OF_LINE = 128;
const int MAX_LENGTH_OF_STRING_ELEMENT = 16;
const char NULL_TERMINATOR = '\0';
const char NEW_LINE_CHARARACTER = '\n';

typedef struct
{
    int numberOfString; // number of string
    char **data;        // stringArray->data[i] is a string.
} StringArray;

typedef struct
{
    char ***data;
    int numberOfStringArray;
    int numberOfString; // number of string
} StringMatrix;

char *readCSV_toString(char *filename, int printFileBool, unsigned long *fileSize_output)
{

    FILE *filePointer = fopen(filename, "rb");
    if (filePointer == NULL)
    {
        printf("Can't Open File");
        return NULL;
    }

    fseek(filePointer, 0L, SEEK_END);
    unsigned long fileSize = ftell(filePointer); // get length of file
    rewind(filePointer);                         // return to beginning of file

    char *fileContent = (char *)malloc(sizeof(char) * (fileSize) + 1);
    fread(fileContent, sizeof(char), fileSize / sizeof(char), filePointer);
    fileContent[fileSize] = NULL_TERMINATOR; // add nullterminator

    if (printFileBool == 1)
    {
        for (int i = 0; i < fileSize + 1; i++)
        {
            printf("%i ", fileContent[i]);
        }
    }

    fclose(filePointer);
    *fileSize_output = fileSize;
    return fileContent;
}

int splitLength(char *string, unsigned long stringSize, char *delimiter)
// return the number of element in arrayList that the initial string will be split into
{
    int length = 0;
    for (unsigned long i = 0; i < stringSize + 1; i++)
    {
        if (string[i] == delimiter[0] && (string[i + 1] != NULL_TERMINATOR))
        {
            length++;
        }
    }
    return length + 1; // because there (a,b,c,d): 3 commas, 4 number. Hence +1
}

StringArray *splitString(char *string, char *delimiter, int maxElementSize)
// return the string.split(delimiter)
{
    StringArray *stringArray = calloc(1, sizeof(StringArray)); // the big thing
    stringArray->numberOfString = splitLength(string, maxElementSize, delimiter);

    int i = 0;
    char *token = strtok(string, delimiter);

    char *stringOutput_i = (char *)malloc(maxElementSize); // malloc str[i] : Important
    strncpy(stringOutput_i, token, maxElementSize);

    stringArray->data = calloc(stringArray->numberOfString, sizeof(char *)); // The array of pointers. (inside of the big thing. )
    stringArray->data[i] = stringOutput_i;
    i++;
    // free(token);

    while (token != NULL && i < MAX_NUMBER_OF_LINE)
    {
        token = strtok(NULL, delimiter);
        if (token == NULL)
            break; // needed because it loop 1 too many time.

        char *stringOutput_i = (char *)malloc(maxElementSize); // malloc str[i] : Important
        strncpy(stringOutput_i, token, maxElementSize);

        stringArray->data[i] = stringOutput_i; // pointer copy.
        i++;
        // free(token); //Do not free(token), it's not supposed to be free. Its a dumb pointer for data[i][j] or data[i];
    }

    return stringArray;
}

void printStringArray(StringArray stringArray)
{
    printf("[ ");
    int iLast = stringArray.numberOfString - 1;
    for (int i = 0; i < stringArray.numberOfString; i++)
    {
        printf("'%s'", stringArray.data[i]);
        if (i != iLast)
            printf(",");
        printf(" ");
    }
    printf("]\n");
}

StringMatrix *getStringMatrix(StringArray *stringArray_input, char *delimiterElement, int maxElementSize)
{
    StringMatrix *stringMatrix = calloc(1, sizeof(StringMatrix));
    stringMatrix->numberOfStringArray = stringArray_input->numberOfString;

    char buffer[maxElementSize];
    strncpy(buffer, stringArray_input->data[0], maxElementSize);
    StringArray *initArray = splitString(buffer, delimiterElement, maxElementSize);
    stringMatrix->numberOfString = initArray->numberOfString;

    stringMatrix->data = malloc(stringMatrix->numberOfStringArray * sizeof(char **));
    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        stringMatrix->data[i] = malloc(stringMatrix->numberOfString * sizeof(char *));
    }

    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        StringArray *elementArray = splitString(stringArray_input->data[i], ",", 100);

        for (int j = 0; j < stringMatrix->numberOfString; j++)
        {

            stringMatrix->data[i][j] = (elementArray->data[j]); // pointer copy
        }
    }
    return stringMatrix;
}

float *stringToFloat_matrix(StringMatrix *stringMatrix, int *m_output, int *n_output)
{

    int n = stringMatrix->numberOfStringArray; // vertical (list of list)
    int m = stringMatrix->numberOfString;      // horizontal (last layer)

    float *array1D = calloc(m * n, sizeof(float));

    float(*matrix)[m] = (float(*)[m])array1D;

    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < m; j++)
        {

            matrix[i][j] = (float)atof(stringMatrix->data[i][j]);
        }
    }

    *m_output = m;
    *n_output = n;
    return array1D;
}

void freeStringArray(StringArray *stringArray)
{
    for (int i = 0; i < stringArray->numberOfString; i++)
    {
        free(stringArray->data[i]);
    }
    free(stringArray->data); // stringArray (-> numberOfString  , -> data -> [ p0, p1, p2, p3,... ] -> { string1, string2, string3,... } )
    free(stringArray);
}

void freeStringMatrix(StringMatrix *stringMatrix)
{
    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        for (int j = 0; j < stringMatrix->numberOfString; j++)
        {
            free(stringMatrix->data[i][j]);
        }
        free(stringMatrix->data[i]);
    }
    free(stringMatrix->data);
    free(stringMatrix);
}

float *openCSV_float(char *filename, char *delimiterLine, char *delimiterElement, int printFileContentBool, int *m_output, int *n_output)
{
    unsigned long fileSize;
    char *fileContent = readCSV_toString(filename, printFileContentBool, &fileSize);
    printf("Filesize: %lu Bytes\n\n", fileSize);

    StringArray *stringArray = splitString(fileContent, delimiterLine, MAX_NUMBER_OF_LINE);

    StringMatrix *stringMatrix = getStringMatrix(stringArray, delimiterElement, MAX_LENGTH_OF_STRING_ELEMENT);

    int m, n;
    float *array1D = stringToFloat_matrix(stringMatrix, &m, &n);
    *m_output = m;
    *n_output = n;
    // Could have put m_output, n_output in stringToFloat_matrix. And would save on 3 line of code and a bit more performance
    // But decided to be verbose.
    free(fileContent);
    freeStringArray(stringArray);
    freeStringMatrix(stringMatrix);

    // other stuff to free? idk. Should be the main leak solved.
    return array1D;
}

void printMatrix_float(float *array1D, int m, int n)
{
    float(*matrix)[m] = (float(*)[m])array1D;

    for (int i = 0; i < n; i++)
    {
        printf("[ ");
        for (int j = 0; j < m; j++)
        {
            printf("%f ", matrix[i][j]);
        }
        printf("]\n");
    }
}

void useFloatMatrix();

void main()
{
    printf("\n\n------------START OF PROGRAM------------\n");

    int n, m;
    int printFileContentBool = 0;
    float *array1D;


    char delimiterLine[2] = {NEW_LINE_CHARARACTER, NULL_TERMINATOR};
    char delimiterElement[3] = ","; //extra byte of space to show it can work
    char filename[100] = "data.csv";
    array1D = openCSV_float(filename, delimiterLine, delimiterElement, printFileContentBool, &m, &n);
    // 1kb of memory leak per file opening loop;


    printf("CSV_content =\n");
    printMatrix_float(array1D, m, n);

    
    printf("\n------------END OF PROGRAM------------");
}

typedef struct
{
    int m;
    int n;
    float data[];

} FloatMatrix; // Could use it but I don't end up using it.

void useFloatMatrix(int m, int n)
{

    FloatMatrix *mat = alloca(sizeof(int) * 2 + m * n * sizeof(float));
    // alloca: allocate on stack. freed at end of function (when $sp_current > $sp_created {stack goes down} )

    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
        {
            ((float(*)[m])(mat->data))[i][j] = 10 * (i + 1) + j; // does it still do 2 pointer derefence?
        }
    }

    float(*matrixGoodType)[m] = ((float(*)[m])(mat->data));

    printf("\nDoing tests inside use float matrix \n");
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
        {
            printf("%f ", matrixGoodType[i][j]);
        }
        printf("\n");
    }

    // all data from mat is on the stack. It should be consecutive in location;
}

0
投票

更好的代码:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

const int MAX_NUMBER_OF_LINE = 128;
const int MAX_LINE_SIZE = 256;
const int MAX_LENGTH_OF_STRING_ELEMENT = 11; // for floats. Increase for double 0.1234567\0 = 10 character.
const char NULL_TERMINATOR = '\0';
const char NEW_LINE_CHARARACTER = '\n';

typedef struct
{
    int numberOfString; // number of string
    char **data;        // stringArray->data[i] is a string.
} StringArray;

typedef struct
{
    char ***data;
    int numberOfStringArray;
    int numberOfString; // number of string
} StringMatrix;

char *readCSV_toString(char *filename, int printFileBool, unsigned long *fileSize_output)
{

    FILE *filePointer = fopen(filename, "rb");
    if (filePointer == NULL)
    {
        fprintf(stderr, "%s","Can't open file '");
        fprintf(stderr,"%s",filename);
        fprintf(stderr,"%s","'\n");
        return NULL;
    }

    fseek(filePointer, 0L, SEEK_END);
    unsigned long fileSize = ftell(filePointer); // get length of file
    rewind(filePointer);                         // return to beginning of file

    char *fileContent = (char *)malloc(sizeof(char) * (fileSize) + 1);
    if (fileContent == NULL)
    {   
        fprintf(stderr, "Can't allocate memory for file %s\n");
        return NULL;
    }
    fread(fileContent, sizeof(char), fileSize / sizeof(char), filePointer);
    fileContent[fileSize] = NULL_TERMINATOR; // add nullterminator

    if (printFileBool == 1)
    {
        for (int i = 0; i < fileSize + 1; i++)
        {
            printf("%i ", fileContent[i]);
        }
    }

    fclose(filePointer);
    *fileSize_output = fileSize;
    return fileContent;
}

int substringCompare(char *stringInput, int start_index, int end_index_excluded, char *stringToBeCompared)
{
    char *stringInputCut = (char *)malloc(sizeof(char) * (end_index_excluded - start_index) + 1);
    for (int i = 0; i < end_index_excluded - start_index; i++)
    {
        stringInputCut[i] = stringInput[i + start_index];
    }
    // printf("max i (included):%d \n",end_index_excluded - 1);
    stringInputCut[end_index_excluded - start_index] = NULL_TERMINATOR;
    // printf("substring cut: '%s'\n", stringInputCut);
    int return_value = strcmp(stringInputCut, stringToBeCompared);
    free(stringInputCut);
    return return_value;
}

int string_splitLength(char *string, unsigned long stringSize, char *delimiter)
// return the number of element in arrayList that the initial string will be split into
{
    int length = 0;
    int delimiterLength = strlen(delimiter);
    // printf("string size :%d\n",stringSize);
    for (unsigned long i = 0; i < stringSize - delimiterLength + 1; i++)
    {
        if (substringCompare(string, i, i + delimiterLength, delimiter) == 0 && (string[i + delimiterLength] != NULL_TERMINATOR))
        {
            // printf("___The substring is: %c%c___\n", string[i], string[i + 1]);
            length++;
        }
    }

    return length + 1; // because there (a,b,c,d): 3 commas, 4 number. Hence +1
}

StringArray *string_split(char *string, char *delimiter, int maxElementSize)
// return the string.split(delimiter)
{
    StringArray *stringArray = calloc(1, sizeof(StringArray)); // the big thing
    stringArray->numberOfString = string_splitLength(string, maxElementSize, delimiter);

    int i = 0;
    char *token = strtok(string, delimiter);

    char *stringOutput_i = (char *)malloc(maxElementSize); // malloc str[i] : Important
    strncpy(stringOutput_i, token, maxElementSize);

    stringArray->data = calloc(stringArray->numberOfString, sizeof(char *)); // The array of pointers. (inside of the big thing. )
    stringArray->data[i] = stringOutput_i;
    i++;

    while (token != NULL && i < MAX_NUMBER_OF_LINE)
    {
        token = strtok(NULL, delimiter);
        if (token == NULL)
            break; // needed because it loop 1 too many time.

        char *stringOutput_i = (char *)malloc(maxElementSize); // malloc str[i] : Important
        strncpy(stringOutput_i, token, maxElementSize);

        stringArray->data[i] = stringOutput_i; // pointer copy.
        i++;
    }

    return stringArray;
}

void printStringArray(StringArray *stringArray)
{
    int n = stringArray->numberOfString;
    printf("The string array with length %d is: \n", n);
    printf("[ ");

    for (int i = 0; i < stringArray->numberOfString; i++)
    {
        printf("'%s'", stringArray->data[i]);
        if (i < n - 1)
            printf(",");
        printf(" ");
    }
    printf("]\n");
}

StringMatrix *getStringMatrix(StringArray *stringArray_input, char *delimiterElement, int maxElementSize)
{
    StringMatrix *stringMatrix = calloc(1, sizeof(StringMatrix));
    stringMatrix->numberOfStringArray = stringArray_input->numberOfString;

    char buffer[MAX_LINE_SIZE];
    strncpy(buffer, stringArray_input->data[0], MAX_LINE_SIZE);
    StringArray *initArray = string_split(buffer, delimiterElement, MAX_LINE_SIZE);

    stringMatrix->numberOfString = initArray->numberOfString;

    stringMatrix->data = malloc(stringMatrix->numberOfStringArray * sizeof(char **));
    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        stringMatrix->data[i] = malloc(stringMatrix->numberOfString * sizeof(char *));
    }

    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        StringArray *elementArray = string_split(stringArray_input->data[i], delimiterElement, MAX_LINE_SIZE);
        for (int j = 0; j < stringMatrix->numberOfString; j++)
        {

            stringMatrix->data[i][j] = (elementArray->data[j]); // pointer copy
        }
    }
    return stringMatrix;
}

void printStringMatrix(StringMatrix *stringMatrix)
{
    int m = stringMatrix->numberOfString;
    int n = stringMatrix->numberOfStringArray;
    printf("The string matrix is: %d %d\n", m, n);
    for (int i = 0; i < n; i++)
    {
        printf("[ ");
        for (int j = 0; j < m; j++)
        {
            printf("'%s' ", stringMatrix->data[i][j]);
        }
        printf("]\n");
    }
    printf("\n");
}

float *stringToFloat_matrix(StringMatrix *stringMatrix, int *m_output, int *n_output)
{

    int n = stringMatrix->numberOfStringArray; // vertical (list of list)
    int m = stringMatrix->numberOfString;      // horizontal (last layer)

    float *array1D = calloc(m * n, sizeof(float));

    float(*matrix)[m] = (float(*)[m])array1D;

    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < m; j++)
        {

            matrix[i][j] = (float)atof(stringMatrix->data[i][j]);
        }
    }

    *m_output = m;
    *n_output = n;
    return array1D;
}

void freeStringArray(StringArray *stringArray)
{
    for (int i = 0; i < stringArray->numberOfString; i++)
    {
        free(stringArray->data[i]);
    }
    free(stringArray->data); // stringArray (-> numberOfString  , -> data -> [ p0, p1, p2, p3,... ] -> { string1, string2, string3,... } )
    free(stringArray);
}

void freeStringMatrix(StringMatrix *stringMatrix)
{
    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        for (int j = 0; j < stringMatrix->numberOfString; j++)
        {
            free(stringMatrix->data[i][j]);
        }
        free(stringMatrix->data[i]);
    }
    free(stringMatrix->data);
    free(stringMatrix);
}

float *openCSV_float(char *filename, char *delimiterLine, char *delimiterElement, int printFileContentBool, int *m_output, int *n_output)
{
    unsigned long fileSize;
    char *fileContent = readCSV_toString(filename, printFileContentBool, &fileSize);
    if (fileContent == NULL)
        exit(1);
    printf("Filesize: %lu Bytes\n\n", fileSize);

    StringArray *stringArray = string_split(fileContent, delimiterLine, MAX_LINE_SIZE);
    printStringArray(stringArray);
    printf("-------\n");

    StringMatrix *stringMatrix = getStringMatrix(stringArray, delimiterElement, MAX_LENGTH_OF_STRING_ELEMENT);
    printStringMatrix(stringMatrix);

    int m, n;
    float *array1D = stringToFloat_matrix(stringMatrix, &m, &n);
    *m_output = m;
    *n_output = n;
    // Could have put m_output, n_output in stringToFloat_matrix. And would save on 3 line of code and a bit more performance
    // But decided to be verbose.
    free(fileContent);
    freeStringArray(stringArray);
    freeStringMatrix(stringMatrix);

    // other stuff to free? idk. Should be the main leak solved.
    return array1D;
}

void printMatrix_float(float *array1D, int m, int n)
{
    float(*matrix)[m] = (float(*)[m])array1D;

    for (int i = 0; i < n; i++)
    {
        printf("[ ");
        for (int j = 0; j < m; j++)
        {
            printf("%f ", matrix[i][j]);
        }
        printf("]\n");
    }
}

int main()
{
    printf("\n\n------------START OF PROGRAM------------\n");

    int n, m;
    int printFileContentBool = 0;
    float *array1D;

    char delimiterLine[2] = {NEW_LINE_CHARARACTER, NULL_TERMINATOR};
    char delimiterElement[] = "||";
    char filename[100] = "data2.csv";

    array1D = openCSV_float(filename, delimiterLine, delimiterElement, printFileContentBool, &m, &n);
    // 1kb of memory leak per file opening loop;

    printf("CSV_content =\n");
    printMatrix_float(array1D, m, n);

    printf("\n------------END OF PROGRAM------------");
    return 0;
}

数据.csv

0.1||0.2||0.3||0.4||0.5
1.1||1.2||1.3||1.4||1.5
2.1||2.2||2.3||2.4||2.5
3.1||3.2||3.3||3.3||3.5

输出


------------START OF PROGRAM------------
Filesize: 95 Bytes

The string array with length 4 is:
[ '0.1||0.2||0.3||0.4||0.5', '1.1||1.2||1.3||1.4||1.5', '2.1||2.2||2.3||2.4||2.5', '3.1||3.2||3.3||3.3||3.5' ]
-------
The string matrix is: 5 4
[ '0.1' '0.2' '0.3' '0.4' '0.5' ]
[ '1.1' '1.2' '1.3' '1.4' '1.5' ]
[ '2.1' '2.2' '2.3' '2.4' '2.5' ]
[ '3.1' '3.2' '3.3' '3.3' '3.5' ]

CSV_content =
[ 0.100000 0.200000 0.300000 0.400000 0.500000 ]
[ 1.100000 1.200000 1.300000 1.400000 1.500000 ]
[ 2.100000 2.200000 2.300000 2.400000 2.500000 ]
[ 3.100000 3.200000 3.300000 3.300000 3.500000 ]

------------END OF PROGRAM------------
© www.soinside.com 2019 - 2024. All rights reserved.