我有一个 OpenCL 1.2 程序,可以将等距柱状 hdr 图像转换为立方体贴图。
在我的 i7-9750H CPU 上一切正常,但当我在 GTX 1650 GPU 上运行程序时,结果始终不正确。
如果相关的话,我的 GPU 驱动程序版本是 536.99,CPU 驱动程序版本是 7.6.0.0228。
这是代码:
constant sampler_t srcSampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
// These transforms specify the directions based on the cube face
// They are based on
// https://www.khronos.org/opengl/wiki_opengl/images/CubeMapAxes.png The order
// is +X, -X, +Y, -Y, +Z, -Z Using the reference image the transforms are
// (horizontal face axis, vertical face axis, face direction)
__constant float3 xTransforms[6] = {
(float3)(0.0f, 0.0f, 1.0f), (float3)(0.0f, 0.0f, -1.0f),
(float3)(1.0f, 0.0f, 0.0f), (float3)(1.0f, 0.0f, 0.0f),
(float3)(1.0f, 0.0f, 0.0f), (float3)(-1.0f, 0.0f, 0.0f)};
__constant float3 yTransforms[6] = {
(float3)(0.0, -1.0f, 0.0f), (float3)(0.0f, -1.0f, 0.0f),
(float3)(0.0f, 0.0f, 1.0f), (float3)(0.0f, 0.0f, -1.0f),
(float3)(0.0f, -1.0f, 0.0f), (float3)(0.0f, -1.0f, 0.0f)};
__constant float3 zTransforms[6] = {
(float3)(-1.0f, 0.0f, 0.0f), (float3)(1.0f, 0.0f, 0.0f),
(float3)(0.0f, 1.0f, 0.0f), (float3)(0.0f, -1.0f, 0.0f),
(float3)(0.0f, 0.0f, 1.0f), (float3)(0.0f, 0.0f, -1.0f)};
float2 projectSphericalMap(float3 dir) {
float2 uv = (float2)(atan2pi(dir.z, dir.x) * 0.5, asinpi(dir.y));
uv += (float2)(0.5f, 0.5f);
return uv;
}
// The kernel is invoked for every pixel on every face of the cubemap
// 'size' is the size of a cube map face
// 'sizefac' is 2/(size-1) precomputed
__kernel void reproject_environment(__read_only image2d_t srcImage,
__write_only image2d_t dstImage, int size,
float sizefac) {
int outu = get_global_id(0);
int outv = get_global_id(1);
int face = get_global_id(2);
// This check is probably unnecessary
if (outu >= size || outv >= size || face >= 6) {
return;
}
// The value range is [-1, 1]
float horizontal = (float)(outu)*sizefac - 1.0;
float vertical = (float)(outv)*sizefac - 1.0;
float3 vec = (float3)(horizontal, vertical, 1.0f);
float x = dot(vec, xTransforms[face]);
float y = dot(vec, yTransforms[face]);
float z = dot(vec, zTransforms[face]);
float3 dir = (float3)(x, y, z);
float2 uv = projectSphericalMap(normalize(dir));
float4 color = read_imagef(srcImage, srcSampler, uv);
// the cube map faces are stacked vertically
write_imagef(dstImage, (int2)(outu, outv + size * face), color);
}
这是我第一次使用 OpenCL,所以错误可能非常明显。
添加一些
printf
进行调试后,我注意到在我的 GPU 上,从 float3
数组读取的 Transform
是错误的。
问题是编译器将数组元素对齐到 4 个浮点,这导致在索引元素时发生 1 个浮点移位。将
float3
更改为 float4
解决了问题。