clCloneKernel导致分段错误

问题描述 投票:1回答:1

我正在使用clCloneKernel进行实验,以了解如何由多个主机线程使用一个内核。 OpenCL规范声明设置内核参数(除其他事项外)不是线程安全的。因此,如果需要由多个主机线程调用同一个内核,则使用clCloneKenrel应该可以解决。

问题是,一旦在初始化的cl_kernel对象上调用clCloneKernel(无论它是在设置参数之前还是之后),都​​会导致程序出现段错误。

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>

#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

using namespace std;

const int MAXNUMDEV = 10;

string kernSource = "       \
kernel void hello()   \
{                     \
   int ID = get_global_id(0);  \
   int grID = get_group_id(0);  \
   printf(\"Work item %i from group %i says hello!\\n\", ID, grID); \
}";

//============================================
void cleanUp (cl_context c, cl_command_queue q, cl_program p, cl_kernel k)
{
  if (k != 0)
    clReleaseKernel (k);

  if (p != 0)
    clReleaseProgram (p);

  if (q != 0)
    clReleaseCommandQueue (q);

  if (c != 0)
    clReleaseContext (c);
}
//============================================
int main ()
{
  cl_int errNum;
  cl_uint numPlatforms;
  cl_platform_id firstPlatformId;
  cl_device_id devID[MAXNUMDEV];
  cl_uint numDev;
  cl_context cont = 0;          // initialize for cleanup check
  cl_command_queue q = 0;
  cl_program pr = 0;
  cl_kernel kernel = 0;

  // Get a reference to an object representing a platform 
  errNum = clGetPlatformIDs (1, &firstPlatformId, &numPlatforms);
  if (errNum != CL_SUCCESS || numPlatforms <= 0)
    {
      cerr << "Failed to find any OpenCL platforms." << endl;
      return 1;
    }

  // Get the device IDs matching the CL_DEVICE_TYPE parameter, up to the MAXNUMDEV limit
  errNum = clGetDeviceIDs (firstPlatformId, CL_DEVICE_TYPE_ALL, MAXNUMDEV, devID, &numDev);
  if (errNum != CL_SUCCESS || numDev <= 0)
    {
      cerr << "Failed to find any OpenCL devices." << endl;
      return 2;
    }

  char devName[100];
  size_t nameLen;
  for (int i = 0; i < numDev; i++)
    {
      errNum = clGetDeviceInfo (devID[i], CL_DEVICE_NAME, 100, (void *) devName, &nameLen);
      if (errNum == CL_SUCCESS)
        cout << "Device " << i << " is " << devName << endl;
    }


  cl_context_properties prop[] = {
    CL_CONTEXT_PLATFORM,
    (cl_context_properties) firstPlatformId,
    0                           // termination
  };

  cont = clCreateContext (prop, numDev, devID, NULL,    // no callback function
                          NULL, // no data for callback
                          &errNum);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to create a context." << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  cl_queue_properties qprop[] = {
    CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
    0
  };
  q = clCreateCommandQueueWithProperties (cont, devID[0], qprop, &errNum);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to create a command queue" << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  const char *src = kernSource.c_str ();
  size_t len = kernSource.size ();
  pr = clCreateProgramWithSource (cont, 1, (const char **) (&src), &len, &errNum);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to create program." << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  errNum = clBuildProgram (pr, 1, devID, NULL, NULL, NULL);
  if (errNum != CL_SUCCESS)
    {
      cerr << "Failed to build program" << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }


  kernel = clCreateKernel (pr, "hello", &errNum);
  if (errNum != CL_SUCCESS || kernel == NULL)
    {
      cerr << "Failed to create kernel" << endl;
      cleanUp (cont, q, pr, kernel);
      return 1;
    }

  cl_kernel copyKern = clCloneKernel(kernel, &errNum); // <<<<<<<<<<<<<<<

  // work item index space and group size setup
  size_t idxSpace[] = { 12 };
  size_t localWorkSize[] = { 3 };

  cl_event completeEv;
  errNum = clEnqueueNDRangeKernel (q, kernel, 1, NULL, idxSpace, localWorkSize, 0, NULL, &completeEv);

  // wait for enqueued command to finish
  clWaitForEvents (1, &completeEv);

  cleanUp (cont, q, pr, kernel);
  return 0;
}

我正在使用clCloneKernel进行实验,以了解如何由多个主机线程使用一个内核。 OpenCL规范声明设置内核参数(除其他事项外)不是线程-...

c++ c thread-safety opencl
1个回答
0
投票

clCloneKernel()是OpenCL 2.1引入的。您的OpenCL平台是否实施此版本的标准?我怀疑可能不是,因此崩溃了。

© www.soinside.com 2019 - 2024. All rights reserved.