我有一些启动进程的 C 代码。对于每个进程,它都会在结构链接列表中注册一些元数据。然后,SIGCHLD处理程序更新所述元数据。结构是这样的:
// block/unblock utilities
static sigset_t block () {
sigset_t sigchld_mask, old_mask;
sigemptyset(&sigchld_mask);
sigaddset(&sigchld_mask, SIGCHLD);
if(sigprocmask(SIG_BLOCK, &sigchld_mask, &old_mask))
exit_with_error();
return old_mask;
}
static void unblock (sigset_t* old_mask) {
if(sigprocmask(SIG_SETMASK, old_mask, NULL)) exit_with_error();
}
typedef struct ChildProcess {
pid_t pid;
int* status;
} ChildProcess;
// ProcessNode is a linked list node containing process metadata
typedef struct ProcessNode {
ChildProcess* proc;
volatile struct ProcessNode* next;
} ProcessNode;
volatile ProcessNode * head = NULL;
int launch_process ( ... ) {
block(); // block SIGCHLD
... // launch process with posix_spawn()
register_proc(...); // register all metadata
unblock(); // unblock SIGCHLD
}
void register_proc (pid_t pid, int* status) {
ChildProcess* child = (ChildProcess*)malloc(sizeof(ChildProcess));
child->pid = pid;
child->status = status;
volatile ProcessNode* node = (ProcessNode*)malloc(sizeof(ProcessNode));
new_node->proc = child;
new_node->next = head;
head = new_node;
return 0;
}
int update_proc (pid_t pid, int status) {
volatile ProcessNode* cursor = head;
while(cursor != NULL && curr->proc->pid != pid) {
cursor = cursor->next;
}
if(cursor != NULL) {
*(cursor->proc->status) = status;
}
}
void sigchld_handler (int sig) {
int status;
pid_t pid;
while((pid = waitpid(-1, &status, WNOHANG))) {
update_proc(pid, status);
}
}
// API function for checking the status pointer
// if we are to wait for the process to terminate, we pselect until sigchld eventually updates the status pointer
int get_state (int* status, bool wait_for_termination) {
sigset_t old_mask = block(); // block SIGCHLD, store old mask
bool state_unknown = true;
bool done = false;
while(state_unknown) {
if(WIFEXITED(*status) || WIFSIGNALED(*status)) {
done = true;
}
state_unknown = false;
if(wait_for_termination && done) {
if(pselect(0, NULL, NULL, NULL, NULL, &old_mask)) {
if(errno == EINTR) {
state_unknown = true; // check status again on interrupt
}
}
}
else if(!wait_for_termination) {
state_unknown = false;
}
}
unblock(); // unblock SIGCHLD
return 0;
}
int main () {
// ... allocate some memory for other tasks
//Setup SIGCHLD handler
sigset_t sigchld_mask;
sigemptyset(&sigchld_mask);
sigaddset(&sigchld_mask, SIGCHLD);
struct sigaction sa;
sa.sa_handler = sigchld_handler;
sa.sa_mask = sigchld_mask;
sa.sa_flags = SA_RESTART;
sigaction(SIGCHLD, &sa, NULL);
}
有时这可行,但有时会失败,因为
update_status
无法找到匹配的 ProcessNode
。发生这种情况是因为当 head
执行时 NULL
是 sigchld_handler
——即使它之前和之后都是非空的。我省略了很多细节,但每次写入或读取 SIGCHLD
时我都会阻止 head
,而且它是不稳定的。我认为这两件事足以应对任何竞争条件。
对于可能出错的事情还有其他想法吗?
通过正确的初始化,您应该为每个终止的进程获得一个
SIGCHLD
。
因此,您不应该在处理程序中的 waitpid
上循环。考虑:
您解雇了两个孩子:孩子 A 和孩子 B
SIGCHLD
对于孩子 A)
waitpid
waitpid
SIGCHLD
对于孩子 B)
waitpid
返回 -1(对于
ECHILD
)
while ((pid = waitpid(-1, &status, WNOHANG)))
update_proc(pid, status);
进入:
pid = waitpid(-1, &status, WNOHANG);
if (pid)
update_proc(pid, status);
volatile
的
head
是不是必需的,因为
block/unblock
调用。这是修改/修复的代码。因为我试图进行完整的诊断并快速解决这个问题,所以我对它进行了一些严重的修改[抱歉]:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdbool.h>
#include <time.h>
#include <sys/wait.h>
typedef struct ChildProcess {
pid_t pid;
int status;
} ChildProcess;
// ProcessNode is a linked list node containing process metadata
typedef struct ProcessNode {
ChildProcess *proc;
struct ProcessNode *next;
} ProcessNode;
ProcessNode *head = NULL;
#define MAXPROC 10
ProcessNode nodelist[MAXPROC];
ChildProcess childlist[MAXPROC];
int forkcnt;
volatile int reapcnt;
volatile int misscnt;
#define prt(_fmt...) \
do { \
char buf[100]; \
size_t len = sprintf(buf,_fmt); \
write(1,buf,len); \
} while (0)
#define sysfault(_fmt...) \
do { \
prt(_fmt); \
exit(99); \
} while (0)
void
exit_with_error(void)
{
int sverr = errno;
sysfault("exit_with_error: sverr=%d (%s)\n",sverr,strerror(sverr));
}
#if 1
static void
block(sigset_t *old_mask)
{
sigset_t sigchld_mask;
sigemptyset(&sigchld_mask);
sigaddset(&sigchld_mask, SIGCHLD);
if(sigprocmask(SIG_BLOCK, &sigchld_mask, old_mask))
exit_with_error();
}
static void
unblock(sigset_t* old_mask)
{
if(sigprocmask(SIG_SETMASK, old_mask, NULL)) exit_with_error();
}
#else
void
block(void)
{
sigset_t set;
sigemptyset(&set);
sigaddset(&set,SIGCHLD);
sigprocmask(SIG_BLOCK,&set,NULL);
}
void
unblock(void)
{
sigset_t set;
sigemptyset(&set);
sigaddset(&set,SIGCHLD);
sigprocmask(SIG_UNBLOCK,&set,NULL);
}
#endif
void
register_proc(int pididx,pid_t pid, int *status)
{
ChildProcess *child = &childlist[pididx];
child->pid = pid;
//child->status = status;
ProcessNode *new_node = &nodelist[pididx];
new_node->proc = child;
new_node->next = head;
head = new_node;
}
int
launch_process(int pididx)
{
sigset_t old_mask;
block(&old_mask); // block SIGCHLD
forkcnt += 1;
pid_t pid = fork();
if (pid != 0)
register_proc(pididx,pid,NULL); // register all metadata
unblock(&old_mask); // unblock SIGCHLD
if (pid == 0) {
#if 0
struct timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 1000 * pididx;
nanosleep(&ts,NULL);
#endif
exit(0x10 + pididx);
}
}
int
update_proc(pid_t pid, int status)
{
ProcessNode *cursor = head;
if (pid <= 0) {
int sverr = errno;
sysfault("update_proc: fault pid=%d sverr=%d (%s)\n",
pid,sverr,strerror(sverr));
}
while (cursor != NULL && cursor->proc->pid != pid) {
cursor = cursor->next;
}
if (cursor != NULL) {
cursor->proc->status = status;
reapcnt += 1;
}
else
misscnt += 1;
}
void
sigchld_handler(int sig)
{
int status;
pid_t pid;
#if BUG
while ((pid = waitpid(-1, &status, WNOHANG)))
update_proc(pid, status);
#else
pid = waitpid(-1, &status, WNOHANG);
if (pid)
update_proc(pid, status);
#endif
}
// API function for checking the status pointer
// if we are to wait for the process to terminate, we pselect until sigchld eventually updates the status pointer
int
get_state(int *status, bool wait_for_termination)
{
sigset_t old_mask;
block(&old_mask); // block SIGCHLD, store old mask
bool state_unknown = true;
bool done = false;
while (state_unknown) {
if (WIFEXITED(*status) || WIFSIGNALED(*status)) {
done = true;
}
state_unknown = false;
if (wait_for_termination && done) {
if (pselect(0, NULL, NULL, NULL, NULL, &old_mask)) {
if (errno == EINTR) {
state_unknown = true; // check status again on interrupt
}
}
}
else if (!wait_for_termination) {
state_unknown = false;
}
}
unblock(&old_mask); // unblock SIGCHLD
return 0;
}
int
main(void)
{
// ... allocate some memory for other tasks
fflush(stdout);
// Setup SIGCHLD handler
sigset_t sigchld_mask;
sigemptyset(&sigchld_mask);
sigaddset(&sigchld_mask, SIGCHLD);
struct sigaction sa;
sa.sa_handler = sigchld_handler;
sa.sa_mask = sigchld_mask;
sa.sa_flags = SA_RESTART;
sigaction(SIGCHLD, &sa, NULL);
for (int pididx = 0; pididx < MAXPROC; ++pididx)
launch_process(pididx);
time_t timebeg = time(NULL);
int doneflg = 0;
while (1) {
time_t timenow = time(NULL);
if ((timenow - timebeg) > 4)
break;
if (reapcnt >= forkcnt) {
doneflg = 1;
break;
}
}
block(NULL);
prt("main: forkcnt=%d reapcnt=%d misscnt=%d\n",forkcnt,reapcnt,misscnt);
for (int pididx = 0; pididx < MAXPROC; ++pididx) {
ProcessNode *cursor = &nodelist[pididx];
ChildProcess *proc = cursor->proc;
prt("main: pididx=%d status=%8.8X\n",pididx,proc->status);
}
if (! doneflg)
sysfault("main: timeout!!!\n");
return 0;
}
cpp
条件来表示旧代码与新代码:
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
注意:这可以通过运行文件来清理 unifdef -k
-DBUG=1
编译):
update_proc: fault pid=-1 sverr=10 (No child processes)
main: forkcnt=10 reapcnt=7 misscnt=0
main: pididx=0 status=00001000
main: pididx=1 status=00001100
main: pididx=2 status=00001200
main: pididx=3 status=00001300
main: pididx=4 status=00001400
main: pididx=5 status=00001500
main: pididx=6 status=00001600
main: pididx=7 status=00000000
main: pididx=8 status=00000000
main: pididx=9 status=00000000
main: timeout!!!
请注意,这个still有一个缺陷。所有进程都应该有终止状态。并且,reapcnt
应与
forkcnt
匹配。但是,您原来的问题[在某种程度上]得到了解决。我将继续查看此问题,看看是否可以找出计数不匹配的原因。