全局指针对信号处理程序显示为空

问题描述 投票:0回答:1

我有一些启动进程的 C 代码。对于每个进程,它都会在结构链接列表中注册一些元数据。然后,SIGCHLD处理程序更新所述元数据。结构是这样的:

// block/unblock utilities
static sigset_t block () {
  sigset_t sigchld_mask, old_mask;
  sigemptyset(&sigchld_mask);
  sigaddset(&sigchld_mask, SIGCHLD);

  if(sigprocmask(SIG_BLOCK, &sigchld_mask, &old_mask))
    exit_with_error();
  
  return old_mask;
}

static void unblock (sigset_t* old_mask)  {
  if(sigprocmask(SIG_SETMASK, old_mask, NULL)) exit_with_error();
}


typedef struct ChildProcess {
  pid_t pid;
  int* status;
} ChildProcess;

// ProcessNode is a linked list node containing process metadata
typedef struct ProcessNode {
  ChildProcess* proc;
  volatile struct ProcessNode* next;
} ProcessNode;

volatile ProcessNode * head = NULL;

int launch_process ( ... ) {
  block();            // block SIGCHLD
  ...                 // launch process with posix_spawn()
  register_proc(...); // register all metadata
  unblock();          // unblock SIGCHLD
}

void register_proc (pid_t pid, int* status) {
  ChildProcess* child = (ChildProcess*)malloc(sizeof(ChildProcess));
  child->pid = pid;
  child->status = status;
  volatile ProcessNode* node = (ProcessNode*)malloc(sizeof(ProcessNode));
  new_node->proc = child;
  new_node->next = head;
  head = new_node;
  return 0;
}

int update_proc (pid_t pid, int status) {
  volatile ProcessNode* cursor = head;
  while(cursor != NULL && curr->proc->pid != pid) {
    cursor = cursor->next;
  }
  if(cursor != NULL) {
    *(cursor->proc->status) = status;
  }
}

void sigchld_handler (int sig) {
  int status;
  pid_t pid;
  while((pid = waitpid(-1, &status, WNOHANG))) {
    update_proc(pid, status);
  }
}

// API function for checking the status pointer
// if we are to wait for the process to terminate, we pselect until sigchld eventually updates the status pointer
int get_state (int* status, bool wait_for_termination) {
  sigset_t old_mask = block(); // block SIGCHLD, store old mask
  bool state_unknown = true;
  bool done = false;
  while(state_unknown) {
    if(WIFEXITED(*status) || WIFSIGNALED(*status)) {
      done = true;
    }
    state_unknown = false;
    if(wait_for_termination && done) {
      if(pselect(0, NULL, NULL, NULL, NULL, &old_mask)) {
        if(errno == EINTR) {
          state_unknown = true; // check status again on interrupt
        }
      }
    }
    else if(!wait_for_termination) {
      state_unknown = false;
    }
  }
  unblock(); // unblock SIGCHLD
  return 0;
}

int main () {
  // ... allocate some memory for other tasks

  //Setup SIGCHLD handler
  sigset_t sigchld_mask;
  sigemptyset(&sigchld_mask);
  sigaddset(&sigchld_mask, SIGCHLD);
  struct sigaction sa;
  sa.sa_handler = sigchld_handler;
  sa.sa_mask = sigchld_mask;
  sa.sa_flags = SA_RESTART;
  sigaction(SIGCHLD, &sa, NULL);
}

有时这可行,但有时会失败,因为

update_status
无法找到匹配的
ProcessNode
。发生这种情况是因为当
head
执行时
NULL
sigchld_handler
——即使它之前和之后都是非空的。我省略了很多细节,但每次写入或读取
SIGCHLD
时我都会阻止
head
,而且它是不稳定的。我认为这两件事足以应对任何竞争条件。

对于可能出错的事情还有其他想法吗?

c linux process signals sigchld
1个回答
0
投票

通过正确的初始化,您应该为每个终止的进程获得一个

SIGCHLD

因此,您不应该在处理程序中的 waitpid

 上循环。

考虑:

您解雇了两个孩子:孩子 A 和孩子 B

    子 A 终止
  1. 已输入处理程序(
  2. SIGCHLD
     对于孩子 A)
  3. 处理程序为孩子 A
  4. 成功
    waitpid
  5. 新子 B 终止
  6. 处理程序循环并为子 B
  7. 成功
    waitpid
  8. 处理程序退出。
  9. 处理程序已输入(
  10. SIGCHLD
     对于孩子 B)
  11. waitpid
     返回 -1(对于 
    ECHILD

在信号处理程序中,修复方法是更改:

while ((pid = waitpid(-1, &status, WNOHANG))) update_proc(pid, status);
进入:

pid = waitpid(-1, &status, WNOHANG); if (pid) update_proc(pid, status);


请注意,

volatile

head
不是必需的,因为block/unblock
调用。

这是修改/修复的代码。因为我试图进行完整的诊断并快速解决这个问题,所以我对它进行了一些严重的修改[抱歉]:

#include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <errno.h> #include <stdbool.h> #include <time.h> #include <sys/wait.h> typedef struct ChildProcess { pid_t pid; int status; } ChildProcess; // ProcessNode is a linked list node containing process metadata typedef struct ProcessNode { ChildProcess *proc; struct ProcessNode *next; } ProcessNode; ProcessNode *head = NULL; #define MAXPROC 10 ProcessNode nodelist[MAXPROC]; ChildProcess childlist[MAXPROC]; int forkcnt; volatile int reapcnt; volatile int misscnt; #define prt(_fmt...) \ do { \ char buf[100]; \ size_t len = sprintf(buf,_fmt); \ write(1,buf,len); \ } while (0) #define sysfault(_fmt...) \ do { \ prt(_fmt); \ exit(99); \ } while (0) void exit_with_error(void) { int sverr = errno; sysfault("exit_with_error: sverr=%d (%s)\n",sverr,strerror(sverr)); } #if 1 static void block(sigset_t *old_mask) { sigset_t sigchld_mask; sigemptyset(&sigchld_mask); sigaddset(&sigchld_mask, SIGCHLD); if(sigprocmask(SIG_BLOCK, &sigchld_mask, old_mask)) exit_with_error(); } static void unblock(sigset_t* old_mask) { if(sigprocmask(SIG_SETMASK, old_mask, NULL)) exit_with_error(); } #else void block(void) { sigset_t set; sigemptyset(&set); sigaddset(&set,SIGCHLD); sigprocmask(SIG_BLOCK,&set,NULL); } void unblock(void) { sigset_t set; sigemptyset(&set); sigaddset(&set,SIGCHLD); sigprocmask(SIG_UNBLOCK,&set,NULL); } #endif void register_proc(int pididx,pid_t pid, int *status) { ChildProcess *child = &childlist[pididx]; child->pid = pid; //child->status = status; ProcessNode *new_node = &nodelist[pididx]; new_node->proc = child; new_node->next = head; head = new_node; } int launch_process(int pididx) { sigset_t old_mask; block(&old_mask); // block SIGCHLD forkcnt += 1; pid_t pid = fork(); if (pid != 0) register_proc(pididx,pid,NULL); // register all metadata unblock(&old_mask); // unblock SIGCHLD if (pid == 0) { #if 0 struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 1000 * pididx; nanosleep(&ts,NULL); #endif exit(0x10 + pididx); } } int update_proc(pid_t pid, int status) { ProcessNode *cursor = head; if (pid <= 0) { int sverr = errno; sysfault("update_proc: fault pid=%d sverr=%d (%s)\n", pid,sverr,strerror(sverr)); } while (cursor != NULL && cursor->proc->pid != pid) { cursor = cursor->next; } if (cursor != NULL) { cursor->proc->status = status; reapcnt += 1; } else misscnt += 1; } void sigchld_handler(int sig) { int status; pid_t pid; #if BUG while ((pid = waitpid(-1, &status, WNOHANG))) update_proc(pid, status); #else pid = waitpid(-1, &status, WNOHANG); if (pid) update_proc(pid, status); #endif } // API function for checking the status pointer // if we are to wait for the process to terminate, we pselect until sigchld eventually updates the status pointer int get_state(int *status, bool wait_for_termination) { sigset_t old_mask; block(&old_mask); // block SIGCHLD, store old mask bool state_unknown = true; bool done = false; while (state_unknown) { if (WIFEXITED(*status) || WIFSIGNALED(*status)) { done = true; } state_unknown = false; if (wait_for_termination && done) { if (pselect(0, NULL, NULL, NULL, NULL, &old_mask)) { if (errno == EINTR) { state_unknown = true; // check status again on interrupt } } } else if (!wait_for_termination) { state_unknown = false; } } unblock(&old_mask); // unblock SIGCHLD return 0; } int main(void) { // ... allocate some memory for other tasks fflush(stdout); // Setup SIGCHLD handler sigset_t sigchld_mask; sigemptyset(&sigchld_mask); sigaddset(&sigchld_mask, SIGCHLD); struct sigaction sa; sa.sa_handler = sigchld_handler; sa.sa_mask = sigchld_mask; sa.sa_flags = SA_RESTART; sigaction(SIGCHLD, &sa, NULL); for (int pididx = 0; pididx < MAXPROC; ++pididx) launch_process(pididx); time_t timebeg = time(NULL); int doneflg = 0; while (1) { time_t timenow = time(NULL); if ((timenow - timebeg) > 4) break; if (reapcnt >= forkcnt) { doneflg = 1; break; } } block(NULL); prt("main: forkcnt=%d reapcnt=%d misscnt=%d\n",forkcnt,reapcnt,misscnt); for (int pididx = 0; pididx < MAXPROC; ++pididx) { ProcessNode *cursor = &nodelist[pididx]; ChildProcess *proc = cursor->proc; prt("main: pididx=%d status=%8.8X\n",pididx,proc->status); } if (! doneflg) sysfault("main: timeout!!!\n"); return 0; }


在上面的代码中,我使用

cpp

 条件来表示旧代码与新代码:

#if 0 // old code #else // new code #endif #if 1 // new code #endif
注意:这可以通过运行文件来清理 

unifdef -k



这是带有原始错误的程序输出(使用

-DBUG=1

编译):

update_proc: fault pid=-1 sverr=10 (No child processes)


这是修复后的程序输出:

main: forkcnt=10 reapcnt=7 misscnt=0 main: pididx=0 status=00001000 main: pididx=1 status=00001100 main: pididx=2 status=00001200 main: pididx=3 status=00001300 main: pididx=4 status=00001400 main: pididx=5 status=00001500 main: pididx=6 status=00001600 main: pididx=7 status=00000000 main: pididx=8 status=00000000 main: pididx=9 status=00000000 main: timeout!!!
请注意,这个

still有一个缺陷。所有进程都应该有终止状态。并且,reapcnt

 应与 
forkcnt
 匹配。

但是,您原来的问题[在某种程度上]得到了解决。我将继续查看此问题,看看是否可以找出计数不匹配的原因。

© www.soinside.com 2019 - 2024. All rights reserved.