大家下午好!我用 C 语言编写了 cat 实用程序的实现,但遇到了一些问题。这是我的代码:
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void parse_args(int argc, char *argv[], int *b_flag, int *e_flag, int *n_flag,
int *s_flag, int *t_flag, int *a_flag);
FILE *open_file(const char *filename, char *argv[]);
void process_file(FILE *file, int b_flag, int e_flag, int n_flag, int s_flag,
int t_flag, int a_flag);
int process_s_flag(char *buffer, int *last_empty_line, int s_flag);
void process_b_flag(char *buffer, int b_flag, int *line_number);
void process_n_flag(char *buffer, int n_flag, int s_flag, int b_flag,
int *line_number, int is_squeezable);
void process_e_flag(char *buffer, int e_flag, int a_flag);
void process_t_flag(char *buffer, int t_flag, int a_flag);
int main(int argc, char *argv[]) {
int b_flag = 0, e_flag = 0, n_flag = 0, s_flag = 0, t_flag = 0, a_flag = 0;
parse_args(argc, argv, &b_flag, &e_flag, &n_flag, &s_flag, &t_flag, &a_flag);
for (int i = optind; i < argc; i++) {
FILE *file = open_file(argv[i], argv);
process_file(file, b_flag, e_flag, n_flag, s_flag, t_flag, a_flag);
fclose(file);
}
return 0;
}
void parse_args(int argc, char *argv[], int *b_flag, int *e_flag, int *n_flag,
int *s_flag, int *t_flag, int *a_flag) {
int flags;
const char *short_options = "beEnstTA";
const struct option long_options[] = {
{"number-nonblank", no_argument, b_flag, 1},
{"show-all", no_argument, a_flag, 1},
{"show-ends", no_argument, e_flag, 1},
{"number", no_argument, n_flag, 1},
{"squeeze-blank", no_argument, s_flag, 1},
{"show-tabs", no_argument, t_flag, 1},
{0, 0, 0, 0}};
while ((flags = getopt_long(argc, argv, short_options, long_options, NULL)) !=
-1) {
switch (flags) {
case 0:
break;
case 'b':
(*b_flag)++;
break;
case 'e':
case 'E':
(*e_flag)++;
break;
case 'n':
(*n_flag)++;
break;
case 't':
case 'T':
(*t_flag)++;
break;
case 's':
(*s_flag)++;
break;
case 'A':
(*a_flag)++;
break;
default:
fprintf(stderr, "Usage: %s [OPTIONS] [FILE]...\n", argv[0]);
exit(EXIT_FAILURE);
}
}
if (argc == optind) {
fprintf(stderr, "Usage: %s [OPTIONS] [FILE]...\n", argv[0]);
exit(EXIT_FAILURE);
}
}
FILE *open_file(const char *filename, char *argv[]) {
FILE *file = fopen(filename, "r");
if (file == NULL) {
fprintf(stderr, "%s: %s: No such file or directory\n", argv[0], filename);
exit(EXIT_FAILURE);
}
return file;
}
void process_file(FILE *file, int b_flag, int e_flag, int n_flag, int s_flag,
int t_flag, int a_flag) {
char *buffer = NULL;
size_t buffer_size = 0;
int last_empty_line = 0;
int line_number = 1;
while ((getline(&buffer, &buffer_size, file)) != -1) {
int is_squeezable = process_s_flag(buffer, &last_empty_line, s_flag);
if (!is_squeezable || !s_flag) {
process_t_flag(buffer, t_flag, a_flag);
process_b_flag(buffer, b_flag, &line_number);
process_n_flag(buffer, n_flag, s_flag, b_flag, &line_number,
is_squeezable);
process_e_flag(buffer, e_flag, a_flag);
fprintf(stdout, "%s", buffer);
}
}
free(buffer);
}
int process_s_flag(char *buffer, int *last_empty_line, int s_flag) {
int length = strlen(buffer);
int is_empty_line = (length <= 1);
int marker = 0;
if (s_flag && is_empty_line) {
if (*last_empty_line) {
marker = 1;
}
*last_empty_line = 1;
} else {
*last_empty_line = 0;
}
return marker;
}
void process_b_flag(char *buffer, int b_flag, int *line_number) {
int length = strlen(buffer);
if (b_flag && length > 1) {
char *tmp = strdup(buffer);
buffer[0] = '\0';
sprintf(buffer, "%6d\t", (*line_number)++);
strcat(buffer, tmp);
free(tmp);
}
}
void process_n_flag(char *buffer, int n_flag, int s_flag, int b_flag,
int *line_number, int is_squeezable) {
if (n_flag) {
if (!s_flag || !is_squeezable) {
if (!b_flag) {
char *tmp = strdup(buffer);
buffer[0] = '\0';
sprintf(buffer, "%6d\t", (*line_number)++);
strcat(buffer, tmp);
free(tmp);
}
}
}
}
void process_e_flag(char *buffer, int e_flag, int a_flag) {
int length = strlen(buffer);
if (e_flag || a_flag) {
if (length > 0 && buffer[length - 1] == '\n') {
buffer[length - 1] = '\0';
sprintf(buffer + length - 1, "%s", "$\n");
} else {
sprintf(buffer + length, "%s", "$");
}
}
}
void process_t_flag(char *buffer, int t_flag, int a_flag) {
int length = strlen(buffer);
if (t_flag || a_flag) {
for (int i = 0; i < length; i++) {
if (buffer[i] == '\t') {
memmove(buffer + i + 2, buffer + i + 1, length - i);
buffer[i] = '^';
buffer[i + 1] = 'I';
length++;
i++;
}
}
}
}
如您所见,我使用 getline 从 strcat 文件中读取行,以根据标志更改它们。通过测试时,显示错误:
=119== Memcheck, a memory error detector
==119== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==119== Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
==119==
==119== Invalid write of size 1
==119== at 0x48A790D: strcat (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x109998: process_b_flag (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== by 0x109815: process_file (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Invalid write of size 1
==119== at 0x48A7917: strcat (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x109998: process_b_flag (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== by 0x109815: process_file (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== Address 0x48b15f1 is 5 bytes after a block of size 44 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Invalid read of size 1
==119== at 0x48A7B1B: strlen (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x109A5F: process_e_flag (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== by 0x10984C: process_file (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Invalid read of size 1
==119== at 0x48A7B1B: strlen (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4048D72: fputs (in /lib/ld-musl-x86_64.so.1)
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119== Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Invalid read of size 1
==119== at 0x40496E4: ??? (in /lib/ld-musl-x86_64.so.1)
==119== Address 0x48b15f0 is 4 bytes after a block of size 44 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Syscall param writev(vector[...]) points to unaddressable byte(s)
==119== at 0x4047995: ??? (in /lib/ld-musl-x86_64.so.1)
==119== Address 0x48b15ec is 0 bytes after a block of size 44 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Invalid read of size 1
==119== at 0x40516A8: ??? (in /lib/ld-musl-x86_64.so.1)
==119== by 0x4049706: ??? (in /lib/ld-musl-x86_64.so.1)
==119== Address 0x48b1809 is 0 bytes after a block of size 73 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Invalid read of size 1
==119== at 0x40516A5: ??? (in /lib/ld-musl-x86_64.so.1)
==119== by 0x4049706: ??? (in /lib/ld-musl-x86_64.so.1)
==119== Address 0x48b180a is 1 bytes after a block of size 73 alloc'd
==119== at 0x48A6FC9: realloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==119== by 0x4049B2C: getdelim (in /lib/ld-musl-x86_64.so.1)
==119== by 0x65F79F75BB8B0087: ???
==119== by 0x1FFEFFD4FF: ???
==119== by 0x109392: main (in /builds/pipelines/test/to/src/cat/s21_cat)
==119==
==119== Syscall param writev(vector[...]) points to uninitialised byte(s)
==119== at 0x4047995: ??? (in /lib/ld-musl-x86_64.so.1)
==119== by 0x17C: ???
==119== Address 0x409578b is in the BSS segment of /lib/ld-musl-x86_64.so.1
==119==
==119==
==119== HEAP SUMMARY:
==119== in use at exit: 0 bytes in 0 blocks
==119== total heap usage: 13 allocs, 13 frees, 2,013 bytes allocated
==119==
==119== All heap blocks were freed -- no leaks are possible
==119==
==119== Use --track-origins=yes to see where uninitialised values come from
==119== For lists of detected and suppressed errors, rerun with: -s
==119== ERROR SUMMARY: 103 errors from 9 contexts (suppressed: 0 from 0)
我明白为什么会发生这种情况,除了文本本身之外,我还尝试在缓冲区中放入其他字符,例如 line_number。所以我决定在我的函数中使用 realloc:
void process_b_flag(char **buffer, int b_flag, int *line_number) {
if (b_flag && *buffer != NULL) {
size_t length = strlen(*buffer);
if (length > 1) {
char *tmp = strdup(*buffer);
(*buffer)[0] = '\0';
sprintf(*buffer, "%6d\t", (*line_number)++);
strcat(*buffer, tmp);
if (strlen(*buffer) + 1 > length) {
char *new_buffer = realloc(*buffer, length + 100);
if (new_buffer != NULL) {
*buffer = new_buffer;
} else {
// Обработка ошибки realloc
free(tmp);
fprintf(stderr, "Memory reallocation error\n");
exit(EXIT_FAILURE);
}
}
free(tmp);
}
}
}
不幸的是,这种方法由于某种原因不起作用。我不明白为什么,但由于某种原因,“length”变量反映了前一行的长度,而不是当前行的长度。因此,如果前一行和当前行中的字符数存在较大差异,则会发出错误“下一个大小无效”。有人可以向我解释为什么会发生这种情况以及如何解决它吗?
一些用于操作通过程序的文本的函数可以或确实尝试增加行中的字符数。当缓冲区已满时,此类尝试会溢出缓冲区,从而产生未定义的行为。您提供的 Valgrind 报告显示了发生这种情况的一些实例。
例如,在
process_b_flag()
中,您有:
char *tmp = strdup(buffer); buffer[0] = '\0'; sprintf(buffer, "%6d\t", (*line_number)++); strcat(buffer, tmp);
这要求
buffer
在第一个空终止符之后至少有 7 个字节的分配容量,并且在不检查的情况下依赖它是不安全的。
同样,在
process_e_flag()
中,这个...
if (length > 0 && buffer[length - 1] == '\n') { buffer[length - 1] = '\0'; sprintf(buffer + length - 1, "%s", "$\n"); } else { sprintf(buffer + length, "%s", "$"); }
...要求缓冲区中字符串终止符之后至少有一个字节的未使用空间,如果没有,则会溢出缓冲区。
这些和任何类似的溢出直接解释了 Valgrind 的几个“无效写入”报告,并且它们间接解释了至少一些“无效读取”报告。
您可以考虑输出您想要的任何行修饰,而不修改
getline()
读取的缓冲区内容,或者至少不需要延长它。例如,当您想要行号时,只需输出它们,而不将它们插入到缓冲区中。
但是,如果您希望或需要在行处理期间扩大缓冲区,那么它将完全适合您的程序,遵循
getline()
的模型:接收指向缓冲区指针的指针和指向其当前大小的指针。如果您发现需要更多空间,请重新分配,更新缓冲区指针和大小。