在简单的 shell 程序中实现命令管道的问题

问题描述 投票:0回答:1
我正在用 C 编写一个非常简单的类似 bash 的 shell,目前正在命令之间实现管道(即 command1 | command2,它应该同时运行这两个命令,第一个命令的 stdout 通过管道与 stdin 连接第二个)。

我已经到了这样的地步

shell> echo test | cat | cat
正确地将“test”打印到字符串中,但任何比这更复杂的东西都不会成功。例如:

shell> ls -1 / | sort | rev
(据我所知)在管道方面与前一个相同,但这个失败了,另一个成功了。

我完全不知道为什么会这样,因为我已经彻底调试了主进程和子进程,并验证了这些进程在工作和不工作命令中都使用正确的连接启动。

这是代码的简化版本:

// Uncomment to use hardcoded input // #define USE_HARDCODED_INPUT #include <stdlib.h> #include <string.h> #include <stddef.h> // NULL #include <errno.h> // ENOENT #include <stdio.h> // setbuf, printf #include <unistd.h> // exec, fork #include <fcntl.h> // open #include <sys/types.h> // wait #include <sys/wait.h> void set_process_FDs(int input, int output, int error) { if (input) { dup2(input, STDIN_FILENO); close(input); } if (output) { dup2(output, STDOUT_FILENO); close(output); } if (error) { dup2(error, STDERR_FILENO); close(error); } } void child_setup(char **argv, int input, int output, int error) { if (input || output || error) set_process_FDs(input, output, error); execvp(argv[0], argv); perror("exec()"); exit(1); } int launch_process(char **argv, int is_last, int input, int output, int error) { int status; pid_t pid = fork(); switch(pid) { case -1: perror("fork()"); return 0; case 0: child_setup(argv, input, output, error); return 0; default: break; } if (is_last) wait(&status); return 1; } int run_commands(char ***argvv) { int no_commands_ran = 0; int argc; char **argv = argvv[0]; int in_pipe[2]; int out_pipe[2]; for (int i=0; (argv = argvv[i]); ++i) { pipe(out_pipe); if (i == 0) in_pipe[0] = 0; if (!argvv[i+1]) { close(out_pipe[0]); close(out_pipe[1]); out_pipe[1] = 0; } for (argc=0; argv[argc]; ++argc); if (!launch_process(argv, !argvv[i+1], in_pipe[0], out_pipe[1], 0)) break; if (i != 0) { close(in_pipe[0]); close(in_pipe[1]); } in_pipe[0] = out_pipe[0]; in_pipe[1] = out_pipe[1]; no_commands_ran = i + 1; } return no_commands_ran; } extern int obtain_order(); // Obtains an order from stdin int main(void) { char ***argvv = NULL; int argvc; char *filev[3] = {NULL, NULL, NULL}; int bg; int ret; setbuf(stdout, NULL); // Unbuffered setbuf(stdin, NULL); while (1) { #ifndef USE_HARDCODED_INPUT printf("%s", "shell> "); // Prompt ret = obtain_order(&argvv, filev, &bg); if (ret == 0) // EOF { fprintf(stderr, "EOF\n"); break; } if (ret == -1) continue; // Syntax error argvc = ret - 1; // Line if (argvc == 0) continue; // Empty line if (!run_commands(argvv)) continue; // Error executing command #else argvc = 3; char ***argvv1 = calloc(4, sizeof(char*)); argvv1[0] = calloc(3, sizeof(char*)); argvv1[0][0] = strdup("echo"); argvv1[0][1] = strdup("test"); argvv1[1] = calloc(2, sizeof(char*)); argvv1[1][0] = strdup("cat"); argvv1[2] = calloc(2, sizeof(char*)); argvv1[2][0] = strdup("cat"); char ***argvv2 = calloc(4, sizeof(char*)); argvv2[0] = calloc(4, sizeof(char*)); argvv2[0][0] = strdup("ls"); argvv2[0][1] = strdup("-1"); argvv2[0][2] = strdup("/"); argvv2[1] = calloc(4, sizeof(char*)); argvv2[1][0] = strdup("sort"); argvv2[2] = calloc(4, sizeof(char*)); argvv2[2][0] = strdup("rev"); printf("%s", "shell> echo test | cat | cat\n"); if (!run_commands(argvv1)) continue; // Error executing command usleep(500); printf("%s", "shell> ls -1 / | sort | rev\n"); if (!run_commands(argvv2)) continue; // Error executing command printf("%s", "\nNo more hardcoded commands to run\n"); break; #endif } return 0; }
obtain_order() 是位于解析器中的函数,这是一个简单的 Yacc 解析器。它只是用 shell 中输入的内容填充名为 argvv 的 argvs 向量。如果有人想尝试代码并查看问题,只需在开头取消注释 #define 即可查看手动键入有问题的命令所得到的行为。

c shell pipe fork exec
1个回答
1
投票
首先,您的父进程不会等待其所有子进程完成执行。

wait

 的调用确实发生在最后一个子进程生成之后

if (is_last) wait(&status);
但它不一定要等待

等待最后一个子进程。也就是说,当任意一个子进程执行完成(或者发生错误)时,就会返回。

正确等待所有子进程完成,在

run_commands

结束时,

/* ... */ /* reap children */ pid_t pid; int status; while ((pid = wait(&status)) > 0) if (WIFEXITED(status)) fprintf(stderr, "LOG: Child<%ld> process exited with status<%d>\n", (long) pid, WEXITSTATUS(status)); return no_commands_ran;
暴露了第一个之后的子级是

hanging的事实,因为wait

阻止了父程序的执行。

(放置几个fprintf

语句后。此处的█表示程序正在阻塞。)

shell> echo test | cat | cat LOG: Child<30607> (echo) LOG: Child<30608> (cat) LOG: Child<30609> (cat) LOG: Child<30607> process exited with status <0> █
无需等待所有子进程,您就会创建

孤儿进程


至于为什么这些进程无法终止,这是因为某些文件描述符没有被关闭。

致电

launch_process


launch_process(argv, !argvv[i+1], in_pipe[0], out_pipe[1], 0)
确保 

in_pipe[0]

out_pipe[1]
 在子进程中关闭,但会泄漏任何有效的文件描述符 
in_pipe[1]
out_pipe[0]
。由于这些泄漏的文件描述符仍然在子进程中打开,关联的管道仍然有效,因此进程在等待更多数据到达时将继续阻塞。

最快的解决方法是更改

launch_process

 以接受两个管道

int launch_process(char **argv, int is_last, int input[2], int output[2], int error);
穿过两个管道

if (!launch_process(argv, !argvv[i+1], in_pipe, out_pipe, 0))
关闭多余的文件描述符

case 0: close(input[1]); close(output[0]); child_setup(argv, input[0], output[1], error); return 0;
删除

if (is_last) wait(&status);
并将之前显示的 

wait

 循环添加到 
run_commands
 的末尾。

这是程序工作版本的完整示例,只需最少的重构。

使用

-DDEBUG

 进行编译以获得一些额外的睡眠时间,以便发现文件描述符泄漏(不应该有任何泄漏)。请阅读
main
中的扩展评论。

#define _POSIX_C_SOURCE 200809L #define USE_HARDCODED_INPUT #define DEBUG_SLEEP_TIME 20 #include <stdio.h> #include <stdlib.h> #include <sys/wait.h> #include <time.h> #include <unistd.h> void set_process_FDs(int input, int output, int error) { if (input) { dup2(input, STDIN_FILENO); close(input); } if (output) { dup2(output, STDOUT_FILENO); close(output); } if (error) { dup2(error, STDERR_FILENO); close(error); } } void child_setup(char **argv, int input, int output, int error) { if (input || output || error) set_process_FDs(input, output, error); #ifdef DEBUG /* a sleep here should allow time to inspect * `/proc/$PID/fd` for FD leaks, see `main` for details * if the child process hangs you will have ample time, regardless */ sleep(DEBUG_SLEEP_TIME); #endif execvp(argv[0], argv); perror("exec()"); exit(EXIT_FAILURE); } int launch_process(char **argv, int is_last, int input[2], int output[2], int error) { pid_t pid = fork(); (void) is_last; switch(pid) { case -1: perror("fork()"); return 0; case 0: fprintf(stderr, "LOG: Child<%ld> (%s)\n", (long) getpid(), *argv); close(input[1]); close(output[0]); child_setup(argv, input[0], output[1], error); return 0; default: break; } return 1; } int run_commands(char ***argvv) { int no_commands_ran = 0; int in_pipe[2]; int out_pipe[2]; char **argv; for (int i = 0; (argv = argvv[i]); ++i) { pipe(out_pipe); if (i == 0) in_pipe[0] = 0; if (!argvv[i+1]) { close(out_pipe[0]); close(out_pipe[1]); out_pipe[1] = 0; } if (!launch_process(argv, !argvv[i+1], in_pipe, out_pipe, 0)) break; if (i != 0) { close(in_pipe[0]); close(in_pipe[1]); } in_pipe[0] = out_pipe[0]; in_pipe[1] = out_pipe[1]; no_commands_ran = i + 1; } /* reap children */ pid_t pid; int status; while ((pid = wait(&status)) > 0) if (WIFEXITED(status)) fprintf(stderr, "LOG: Child<%ld> process exited with status<%d>\n", (long) pid, WEXITSTATUS(status)); return no_commands_ran; } int main(void) { fprintf(stderr, "LOG: Parent ID: <%ld>\n", (long) getpid()); #ifdef USE_HARDCODED_INPUT char ***argvv1 = calloc(4, sizeof(char*)); argvv1[0] = calloc(3, sizeof(char*)); argvv1[0][0] = "echo"; argvv1[0][1] = "test"; argvv1[1] = calloc(2, sizeof(char*)); argvv1[1][0] = "cat"; argvv1[2] = calloc(2, sizeof(char*)); argvv1[2][0] = "cat"; char ***argvv2 = calloc(4, sizeof(char*)); argvv2[0] = calloc(4, sizeof(char*)); argvv2[0][0] = "ls"; argvv2[0][1] = "-1"; argvv2[0][2] = "/"; argvv2[1] = calloc(2, sizeof(char*)); argvv2[1][0] = "sort"; argvv2[2] = calloc(2, sizeof(char*)); argvv2[2][0] = "rev"; puts("shell> echo test | cat | cat"); if (!run_commands(argvv1)) return EXIT_FAILURE; /* usleep is deprecated */ nanosleep(&(struct timespec) { .tv_nsec = 5e5 }, NULL); puts("shell> ls -1 / | sort | rev"); if (!run_commands(argvv2)) return EXIT_FAILURE; puts("No more hardcoded commands to run"); #endif #ifdef DEBUG /* compile with -DDEBUG * placing a sleep here to provide time to discover * any file descriptor leaks * inspect `ls -l /proc/$PID/fd` * only the standard stream fds should exist (0, 1, 2) at * either debug sleep * see child_setup as well */ sleep(DEBUG_SLEEP_TIME); #endif }


这是一个建立一系列管道和进程的粗略的、带注释的示例。它的工作方式与您的示例类似,可能有助于进一步展示文件描述符必须打开、复制和关闭的顺序。

#define _POSIX_C_SOURCE 200809L #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <wait.h> int valid(int fd) { return fd >= 0; } /* these safe_* functions are a non-operation when passed negative values */ void safe_close(int fd) { if (valid(fd) && !valid(close(fd))) perror("close"); } void safe_dup2(int old, int new) { if (valid(old) && valid(new) && !valid(dup2(old, new))) perror("dup2"); } void execute(char *args[][8], size_t length) { int channel[2] = { -1, -1 }; for (size_t i = 0; i < length; i++) { /* get previous reader in parent */ int from = channel[0]; /* close previous writer in parent */ safe_close(channel[1]); /* create current-writer-to-next-reader pipe */ if (!valid(pipe(channel))) perror("pipe"); int to = (i < length - 1) ? channel[1] : -1; if (0 == fork()) { /* duplicate previous reader to stdin in child */ safe_dup2(from, fileno(stdin)); /* close previous reader in child */ safe_close(from); /* close next reader in current child */ safe_close(channel[0]); /* duplicate current writer to stdout in child */ safe_dup2(to, fileno(stdout)); /* close current writer in child */ safe_close(channel[1]); execvp(args[i][0], args[i]); perror("exec"); exit(EXIT_FAILURE); } /* close previous reader in parent */ safe_close(from); } /* close final pipe in parent */ safe_close(channel[0]); safe_close(channel[1]); /* reap children */ pid_t pid; int status; while ((pid = wait(&status)) > 0) if (WIFEXITED(status)) fprintf(stderr, "LOG: Child<%ld> process exited with status<%d>\n", (long) pid, WEXITSTATUS(status)); } int main(void) { char *argv[][8] = { { "echo", "test", NULL }, { "cat", NULL }, { "cat", "-n", NULL } }; execute(argv, 3); char *argv2[][8] = { { "ls", "-1", "/", NULL }, { "sort", NULL }, { "rev", NULL } }; execute(argv2, 3); }


Aside:作为边缘情况,0

 是有效的文件描述符。 
set_process_FDs
的缺陷在于,如果
STDIN_FILENO
关闭,并且获取了新的文件描述符,则它可能为零。 if (output)
if (error)
 可能不会按预期运行。

© www.soinside.com 2019 - 2024. All rights reserved.