fork后CUDA初始化错误

问题描述 投票:0回答:1

调用 fork() 后出现“初始化错误”。如果我在没有 fork 的情况下运行相同的程序,一切正常。

if (fork() == 0) {
    ...
    cudaMalloc(....);
    ...
}

什么原因会导致这种情况?

下面是一个完整的示例。如果我注释掉 cudaGetDeviceCount 调用,它就可以正常工作。

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <cuda_runtime.h>

#define PERR(call) \
  if (call) {\
   fprintf(stderr, "%s:%d Error [%s] on "#call"\n", __FILE__, __LINE__,\
      cudaGetErrorString(cudaGetLastError()));\
   exit(1);\
  }

int
main(int argc, char **argv)
{
  float *v_d;
  int gpucount;

  cudaGetDeviceCount(&gpucount);

  if (fork() == 0) {
    cudaSetDevice(0);
    PERR(cudaMalloc(&v_d, 1000*sizeof(float)));
  }
  wait(NULL);
  return 0;
}

简单的Makefile:

PROGS = fork
CUDA_PATH = /usr/local/cuda
CXXFLAGS = -g -O0 -Wall
CXXINCLUDES = -I$(CUDA_PATH)/include
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(CXX) -Xcompiler "$(CXXFLAGS)"

fork: fork.cxx
        $(NVCC) $^ -o $@ $(LIBS)

clean:
        (rm $(PROGS) *.o)

在这种情况下,我只是尝试从父进程中获取可用设备的数量。这个解决方法可以做到:

  if (fork() == 0) {
    PERR(cudaGetDeviceCount(&gpucount));
    return(gpucount);
  }
  wait(&gpucount);
  gpucount =  WEXITSTATUS(gpucount);
cuda
1个回答
18
投票

fork()
创建一个子进程。进程有自己的地址空间。由于多种原因,CUDA 上下文无法在两个不同的进程之间共享,其中之一是各种指针在不同的地址空间中将毫无意义。

如果您在

fork()
之前创建 CUDA 上下文,则无法在子进程中使用它。
cudaSetDevice(0);
调用尝试共享 CUDA 上下文,当您调用
cudaGetDeviceCount();

时在父进程中隐式创建 正如您所暗示的,解决方案是在父进程或子进程中执行 CUDA 工作。如果您处于多设备系统中,则应该可以将单独的设备分配给单独的进程(CUDA

simpleIPC 示例代码 正是这样做的)。 (关键是在fork之前不要创建CUDA上下文。)

您可能对

这个问题/答案这个感兴趣。

这是一个完整的示例(需要 2 个 CUDA 设备),显示使用单独 GPU 的子进程和父进程:

$ cat t345.cu #include <unistd.h> /* Symbolic Constants */ #include <sys/types.h> /* Primitive System Data Types */ #include <errno.h> /* Errors */ #include <stdio.h> /* Input/Output */ #include <sys/wait.h> /* Wait for Process Termination */ #include <stdlib.h> /* General Utilities */ #define cudaCheckErrors(msg) \ do { \ cudaError_t __err = cudaGetLastError(); \ if (__err != cudaSuccess) { \ fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \ msg, cudaGetErrorString(__err), \ __FILE__, __LINE__); \ fprintf(stderr, "*** FAILED - ABORTING\n"); \ exit(1); \ } \ } while (0) __global__ void addkernel(int *data){ *data += 1; } int main() { pid_t childpid; /* variable to store the child's pid */ int retval; /* child process: user-provided return code */ int status; /* parent process: child's exit status */ /* only 1 int variable is needed because each process would have its own instance of the variable here, 2 int variables are used for clarity */ /* now create new process */ childpid = fork(); if (childpid >= 0) /* fork succeeded */ { if (childpid == 0) /* fork() returns 0 to the child process */ { printf("CHILD: I am the child process!\n"); printf("CHILD: Here's my PID: %d\n", getpid()); printf("CHILD: My parent's PID is: %d\n", getppid()); printf("CHILD: The value of my copy of childpid is: %d\n", childpid); int *h_a, *d_a; h_a = (int *)malloc(sizeof(int)); cudaSetDevice(0); cudaCheckErrors("CHILD cudaSetDevice fail"); cudaMalloc(&d_a, sizeof(int)); cudaCheckErrors("cudaMalloc fail"); *h_a = 1; cudaMemcpy(d_a, h_a, sizeof(int), cudaMemcpyHostToDevice); cudaCheckErrors("cudaMemcpy H2D fail"); addkernel<<<1,1>>>(d_a); cudaDeviceSynchronize(); cudaCheckErrors("kernel fail"); cudaMemcpy(h_a, d_a, sizeof(int), cudaMemcpyDeviceToHost); cudaCheckErrors("cudaMemcpy D2H fail"); printf("CHILD: result: %d\n", *h_a); printf("CHILD: Sleeping for 1 second...\n"); sleep(1); /* sleep for 1 second */ cudaDeviceReset(); printf("CHILD: Enter an exit value (0 to 255): "); scanf(" %d", &retval); printf("CHILD: Goodbye!\n"); exit(retval); /* child exits with user-provided return code */ } else /* fork() returns new pid to the parent process */ { printf("PARENT: I am the parent process!\n"); printf("PARENT: Here's my PID: %d\n", getpid()); printf("PARENT: The value of my copy of childpid is %d\n", childpid); printf("PARENT: I will now wait for my child to exit.\n"); int *h_a, *d_a; h_a = (int *)malloc(sizeof(int)); cudaSetDevice(1); cudaCheckErrors("PARENT cudaSetDevice fail"); cudaMalloc(&d_a, sizeof(int)); cudaCheckErrors("cudaMalloc fail"); *h_a = 2; cudaMemcpy(d_a, h_a, sizeof(int), cudaMemcpyHostToDevice); cudaCheckErrors("cudaMemcpy H2D fail"); addkernel<<<1,1>>>(d_a); cudaDeviceSynchronize(); cudaCheckErrors("kernel fail"); cudaMemcpy(h_a, d_a, sizeof(int), cudaMemcpyDeviceToHost); cudaCheckErrors("cudaMemcpy D2H fail"); printf("PARENT: result: %d\n", *h_a); wait(&status); /* wait for child to exit, and store its status */ printf("PARENT: Child's exit code is: %d\n", WEXITSTATUS(status)); cudaSetDevice(0); cudaCheckErrors("PARENT cudaSetDevice 2 fail"); int *h_a2, *d_a2; cudaMalloc(&d_a2, sizeof(int)); cudaCheckErrors("cudaMalloc fail"); h_a2 = (int *)malloc(sizeof(int)); *h_a2 = 5; cudaMemcpy(d_a2, h_a2, sizeof(int), cudaMemcpyHostToDevice); cudaCheckErrors("cudaMemcpy H2D fail"); addkernel<<<1,1>>>(d_a2); cudaDeviceSynchronize(); cudaCheckErrors("kernel fail"); cudaMemcpy(h_a2, d_a2, sizeof(int), cudaMemcpyDeviceToHost); cudaCheckErrors("cudaMemcpy D2H fail"); printf("PARENT: result2: %d\n", *h_a2); printf("PARENT: Goodbye!\n"); exit(0); /* parent exits */ } } else /* fork returns -1 on failure */ { perror("fork"); /* display error message */ exit(0); } } $ nvcc -arch=sm_20 -o t345 t345.cu $ ./t345 CHILD: I am the child process! CHILD: Here's my PID: 23603 CHILD: My parent's PID is: 23602 CHILD: The value of my copy of childpid is: 0 PARENT: I am the parent process! PARENT: Here's my PID: 23602 PARENT: The value of my copy of childpid is 23603 PARENT: I will now wait for my child to exit. CHILD: result: 2 CHILD: Sleeping for 1 second... PARENT: result: 3 CHILD: Enter an exit value (0 to 255): 10 CHILD: Goodbye! PARENT: Child's exit code is: 10 PARENT: result2: 6 PARENT: Goodbye! $
(修改自

此处

编辑: 我引用的

示例代码已更新:

//Keep in mind that CUDA has minimal support for fork() without a // corresponding exec() in the child process, but in this case our // spawnProcess will always exec, so no need to worry.
现在的期望是,除了在

fork()

之前不创建 CUDA 上下文之外,每个子进程还应该
立即调用exec()

© www.soinside.com 2019 - 2024. All rights reserved.