如何正确读取c中文件中的某些字符串?

问题描述 投票:0回答:1

我试图在c中编写一个比较字符串的程序。字符串是成对出现的,文件顶部有对数。该文件具有如下形式:

2
a: 01010100000101011111
   01001010100000001111
   00000000000011110000
b: 00000111110000010001
   10101010100111110001
a: 00000011111111111100
   00111111111111000
b: 00000001111001010101

我的问题是正确读取字符串以执行比较等

这是我的代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>

#define NCHAR 32

int main (int argc, char **argv) {
    char *word1 = NULL;
    FILE *fp = NULL;
    for (int i = 0; i<pairs; i++){

        if (i == 0)
        {
            word1 = readWord(fp, &word1);//read a:
            while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
        }

        word1 = readWord(fp, &word1);//read string
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);

        aline = malloc(amaxsize);
        strncpy(aline, word1, amaxsize);

        word1 = readWord(fp, &word1); 
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);

        while (strcmp(word1, "b:")!=0){
            aline = concat(aline, word1);

            word1 = readWord(fp, &word1); 
            while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
        }

        fprintf(fpw, "a: %s\n", aline); //write to the file..
        free (word1);
        word1 = NULL;

        word1 = readWord(fp, &word1); //read string after b:
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
        bline = malloc(bmaxsize);
        strncpy(bline, word1, bmaxsize);

        word1 = readWord(fp, &word1); 
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);

        if (i == (pairs-1))
        {

            while (strcmp(word1, "")!=0){
                bline = concat(bline, word1);
                word1 = readWord(fp, &word1);

            }
        }
        else 
        {
            while (strcmp(word1, "a:")!=0){
                bline = concat(bline, word1);
                word1 = readWord(fp, &word1);
                while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
            }
        }
        fprintf(fpw, "b: %s\n", bline); //write to the file..
        free (word1);
        word1 = NULL;

        fprintf(fpw,"\n");
}

    char *readWord(FILE *fp, char **buffer)
    {
        int ch, nchar = NCHAR;
        int buflen = 0;
        *buffer = malloc (nchar);

        if(*buffer){
            while ((ch = fgetc(fp)) != '\n' && ch != EOF && ch != '\t' && ch != ' ') 
            {
                if (ch!='\t' && ch!= ' ' && ch != '\n') (*buffer)[buflen++] = ch;

                if (buflen + 1 >= nchar) {  /* realloc */
                    char *tmp = realloc (*buffer, nchar * 2);
                    if (!tmp) {

                        (*buffer)[buflen] = 0;

                        return *buffer;
                    }
                    *buffer = tmp;
                    nchar *= 2;
                }
            }
            (*buffer)[buflen] = 0;           /* nul-terminate */

            if (buflen == 0 && ch == EOF) {  /* return NULL if nothing read */
                free (*buffer);
                *buffer = NULL;
            }
            return *buffer;
        }
        else {
            fprintf (stderr, "Error...\n");
            return NULL;
        }
    }

readWord函数每次读取一个单词。我想要做的是用文字读取文件并连接它们以获得完整的字符串a并将其保存在aline中以便我可以处理它。与b相同。问题是文件没有正确读取,例如,而不是获得第一对的整个a,我只得到它的第一部分。有什么想法吗?

c string file text-files readfile
1个回答
0
投票

你从文件中读取的读取是非常重要的,但可以通过设置一个标志告诉你是否已经看到'a''b',跳过所有空格和':'字符,将所有其他字符存储在缓冲区中,重新分配,可以相当简单地处理。根据需要,然后当第二个'a''b'is被发现时,用FILE*将该角色放回ungetc流中,然后终止并返回缓冲区。

听起来很容易 - 对吧?嗯,这就是它。让我们来看看你的readword()函数需要什么。

首先,由于您在buffer中分配readword(),因此无需将char **buffer作为参数传递。您已经将readword声明为char *readword(...),因此只需将FILE*指针作为参数传递,并返回指向已分配,已填充和以空值终止的缓冲区的指针。

您可以按照自己喜欢的方式处理重新分配方案。您可以从分配的合理数量的字符开始,然后加倍(或添加一些倍数)当前大小,或者每次用完时添加固定数量。下面的示例只是以32-char缓冲区开始,然后每次需要重新分配时再添加32个字符。 (如果数据大小真的未知,我可能会从32个字符开始,然后每次用完时加倍 - 完全取决于你)。

使用isspace()中的ctype.h函数可确保正确处理所有空格。

最后几个问题只是确保在缓冲区中返回一个以空字符结尾的字符串,并确保在调用realloc时,在每个新内存块中重新初始化指向缓冲区末尾的指针。

完全放在一起,你可以做类似以下的事情。在readword()函数之后添加一个简单的示例程序来读取您的示例文件并输出从该文件读取的组合字符串,

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#define NCHR  32

char *readword (FILE *fp)
{
    int c,                      /* current character */
        firstline = 0;          /* flag for 'a' or 'b' found at 1st char */
    size_t n = 0, nchr = NCHR;  /* chars read, number of chars allocated */
    char *buffer = NULL, *p;    /* buffer to fill, pointer to buffer */

    buffer = malloc (nchr);             /* allocate initial NCHR */
    if (!buffer) {                      /* validate */
        perror ("malloc-buffer");
        return NULL;
    }
    p = buffer;                         /* set pointer to buffer */

    while ((c = fgetc (fp)) != EOF) {   /* read each char */
        if (isspace (c) || c == ':')    /* skip all whitespace and ':' */
            continue;
        if (c == 'a' || c == 'b') {     /* begins with 'a' or 'b' */
            if (firstline) {            /* already had a/b line */
                ungetc (c, fp);         /* put the char back */
                *p = 0;                 /* nul-terminate */
                return buffer;          /* return filled buffer */
            }
            firstline = 1;              /* set firstline flag */
            continue;
        }
        else {
            if (n == nchr - 2) {        /* check if realloc needed */
                void *tmp = realloc (buffer, nchr + NCHR);
                if (!tmp)               /* validate */
                    exit (EXIT_FAILURE);
                buffer = tmp;           /* assign new block to buffer */
                p = buffer + n;         /* set p at buffer end */
                nchr += NCHR;           /* update no. chars allocated */
            }
            *p++ = c;       /* assign the current char and advance p */
            n++;            /* increment your character count */
        }
    }
    *p = 0;         /* nul-terminate */

    return buffer;
}

int main (int argc, char **argv) {

    char buf[NCHR], *word;
    int nwords, toggle = 0;
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }

    if (!fgets (buf, NCHR, fp)) {
        fputs ("error: read of line 1 failed.\n", stderr);
        return 1;
    }
    if (sscanf (buf, "%d", &nwords) != 1) {
        fputs ("error: invalid file format.\n", stderr);
        return 1;
    }
    nwords *= 2;   /* actual number of words is twice the number of pairs */

    while (nwords-- && (word = readword (fp))) {
        printf ("%c: %s\n", toggle ? 'b' : 'a', word);
        free (word);
        if (toggle) {
            putchar ('\n');
            toggle = 0;
        }
        else
            toggle = 1;
    }

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    return 0;
}

(注意:在toggle上方只是一个10标志,用于在相应的行的开头输出"a:""b:",并在读取的行对之间添加'\n'。)

示例使用/输出

$ ./bin/read_multiline_pairs dat/pairsbinline.txt
a: 010101000001010111110100101010000000111100000000000011110000
b: 0000011111000001000110101010100111110001

a: 0000001111111111110000111111111111000
b: 00000001111001010101

内存使用/错误检查

始终在动态分配存储时验证内存使用情况,并确保已释放所分配的所有内存。

$ valgrind ./bin/read_multiline_pairs dat/pairsbinline.txt
==14257== Memcheck, a memory error detector
==14257== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==14257== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==14257== Command: ./bin/read_multiline_pairs dat/pairsbinline.txt
==14257==
a: 010101000001010111110100101010000000111100000000000011110000
b: 0000011111000001000110101010100111110001

a: 0000001111111111110000111111111111000
b: 00000001111001010101

==14257==
==14257== HEAP SUMMARY:
==14257==     in use at exit: 0 bytes in 0 blocks
==14257==   total heap usage: 8 allocs, 8 frees, 872 bytes allocated
==14257==
==14257== All heap blocks were freed -- no leaks are possible
==14257==
==14257== For counts of detected and suppressed errors, rerun with: -v
==14257== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

仔细看看,如果您有疑问,请告诉我。问题的最大部分是处​​理每对的所有行的读取和连接。其余的编码留给你。

© www.soinside.com 2019 - 2024. All rights reserved.