clFFT 库执行 2D 实数到复数 FFT。输出数组只为实部和虚部保留零,这是不正确的(我有一个使用 fftw3 实现的工作版本)。输入数组 float *in
是正确的。尺寸的长度(在下面的代码中)是
receiver_number
(对于
N0
)和
signal_length
(对于
N1
)。我已经检查了 OpenCL 初始化过程是否有错误,但除了
CL_SUCCESS
之外没有收到其他消息。
cols
和
rows
是复数输出的结果维度(埃尔米特对称性)。由于实数和虚数,输出数组的大小为
sizeof(float) * cols * rows * 2
。必须设置strides 才能告诉 clFFT “输入/输出缓冲区所有维度中元素之间的距离”。
auto cols = (signal_length / 2 + 1);
auto rows = receiver_number;
float *data_ptr = in; // in is a float pointer of size receiver_number * signal_length
float *out = new float[cols * rows * 2]; // to store real- & imag-values
of the complex number next to each other (hermitian symmetry)
/* Prepare OpenCL memory objects and place data inside them. */
cl_int err;
cl_mem data_ptr_d = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(float) * receiver_number * signal_length, NULL, &err );
cl_mem out_d = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(float) * cols * rows * 2, NULL, &err );
err = clEnqueueWriteBuffer( queue, data_ptr_d, CL_TRUE, 0, sizeof(float) * receiver_number * signal_length, data_ptr, 0, NULL, NULL);
/* Create a default plan for FFT. */
const size_t N0 = receiver_number, N1 = signal_length;
clfftPlanHandle forward_plan;
size_t clLengthsForward[2] = {N0, N1};
err = clfftCreateDefaultPlan(&forward_plan, context, CLFFT_2D, clLengthsForward);
/* Set input and output stride */
size_t clStridesForwardIn[2] = {1, (unsigned long)signal_length};
size_t clStridesForwardOut[2] = {1, (unsigned long)cols};
err = clfftSetPlanInStride(forward_plan, CLFFT_2D, clStridesForwardIn);
err = clfftSetPlanOutStride(forward_plan, CLFFT_2D, clStridesForwardOut);
err = clfftSetPlanDistance(forward_plan, receiver_number * signal_length, cols * rows * 2);
/* Set plan parameters. */
err = clfftSetPlanPrecision(forward_plan, CLFFT_SINGLE);
err = clfftSetLayout(forward_plan, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);
err = clfftSetResultLocation(forward_plan, CLFFT_OUTOFPLACE); // store output in seperate array
/* Bake the plan. */
err = clfftBakePlan(forward_plan, 1, &queue, NULL, NULL);
/* Execute the plan. */
err = clfftEnqueueTransform(forward_plan, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &data_ptr_d, &out_d, NULL);
/* Wait for calculations to be finished. */
err = clFinish(queue);
/* Fetch results of calculations. */
err = clEnqueueReadBuffer( queue, out_d, CL_TRUE, 0, sizeof(float)* cols * rows * 2, out, 0, NULL, NULL );
我只收到复数值的实部和虚部的零。我无法真正解释这个错误,因为设置的步幅和记忆距离对我来说很有意义。所有内存分配/传输和内核执行调用都会产生 CL_SUCCESS
。
N0
数组上的
N1
和
clLengths
。我刚刚在工作
N0
实现中复制了
N1
函数调用中的
fftw_plan_dft_r2c_2d()
和
fftw3
值,假设它们是等效的。