  1. 启动子过程
  2. 分别从stdout和stderr读取
context.WithTimeout(context.Background(), 10*time.Second)

尽管这种方法在大多数情况下都有效,但我们能够找到永远挂死的案例。子进程的某些方面导致此僵局。 (与未与子进程充分分离的子孙有关,从而导致子永远不会完全退出。)


func GetOutputsWithTimeout(command string, args []string, timeout int) (io.ReadCloser, io.ReadCloser, int, error) { start := time.Now() procLogger.Tracef("Initializing %s %+v", command, args) cmd := exec.Command(command, args...) // get pipes to standard output/error stdout, err := cmd.StdoutPipe() if err != nil { return emptyReader(), emptyReader(), -1, fmt.Errorf("cmd.StdoutPipe() error: %+v", err.Error()) } stderr, err := cmd.StderrPipe() if err != nil { return emptyReader(), emptyReader(), -1, fmt.Errorf("cmd.StderrPipe() error: %+v", err.Error()) } // setup buffers to capture standard output and standard error var buf bytes.Buffer var ebuf bytes.Buffer // create a channel to capture any errors from wait done := make(chan error) // create a semaphore to indicate when both pipes are closed var wg sync.WaitGroup wg.Add(2) go func() { if _, err := buf.ReadFrom(stdout); err != nil { procLogger.Debugf("%s: Error Slurping stdout: %+v", command, err) } wg.Done() }() go func() { if _, err := ebuf.ReadFrom(stderr); err != nil { procLogger.Debugf("%s: Error Slurping stderr: %+v", command, err) } wg.Done() }() // start process procLogger.Debugf("Starting %s", command) if err := cmd.Start(); err != nil { procLogger.Errorf("%s: failed to start: %+v", command, err) return emptyReader(), emptyReader(), -1, fmt.Errorf("cmd.Start() error: %+v", err.Error()) } go func() { procLogger.Debugf("Waiting for %s (%d) to finish", command, cmd.Process.Pid) err := cmd.Wait() // this can be 'forced' by the killing of the process procLogger.Tracef("%s finished: errStatus=%+v", command, err) // err could be nil here //notify select of completion, and the status done <- err }() // Wait for timeout or completion. select { // Timed out case <-time.After(time.Duration(timeout) * time.Second): elapsed := time.Since(start) procLogger.Errorf("%s: timeout after %.1f\n", command, elapsed.Seconds()) if err := TerminateTree(cmd); err != nil { return ioutil.NopCloser(&buf), ioutil.NopCloser(&ebuf), -1, fmt.Errorf("failed to kill %s, pid=%d: %+v", command, cmd.Process.Pid, err) } wg.Wait() // this *should* take care of waiting for stdout and stderr to be collected after we killed the process return ioutil.NopCloser(&buf), ioutil.NopCloser(&ebuf), -1, fmt.Errorf("%s: timeout %d s reached, pid=%d process killed", command, timeout, cmd.Process.Pid) //Exited normally or with a non-zero exit code case err := <-done: wg.Wait() // this *should* take care of waiting for stdout and stderr to be collected after the process terminated naturally. elapsed := time.Since(start) procLogger.Tracef("%s: Done after %.1f\n", command, elapsed.Seconds()) rc := -1 // Note that we have to use go1.10 compatible mechanism. if err != nil { procLogger.Tracef("%s exited with error: %+v", command, err) exitErr, ok := err.(*exec.ExitError) if ok { ws := exitErr.Sys().(syscall.WaitStatus) rc = ws.ExitStatus() } procLogger.Debugf("%s exited with status %d", command, rc) return ioutil.NopCloser(&buf), ioutil.NopCloser(&ebuf), rc, fmt.Errorf("%s: process done with error: %+v", command, err) } else { ws := cmd.ProcessState.Sys().(syscall.WaitStatus) rc = ws.ExitStatus() } procLogger.Debugf("%s exited with status %d", command, rc) return ioutil.NopCloser(&buf), ioutil.NopCloser(&ebuf), rc, nil } //NOTREACHED: should not reach this line! }

大部分时间。但是有时它会返回no output,好像读取stdout的goroutine不够快就无法“捕获”所有输出(或提前退出?)“大多数时候”强烈暗示了竞争状况。



那么我们怎么会缺少输出?在两个“阅读器” goroutine和cmd.Start()之间是否仍然存在竞争条件?我们是否应该使用另外一个WaitGroup确保这两个正在运行?




