OpenCL 内核在 GPU 上生成不正确的图像

问题描述 投票:0回答:1

我有一个 OpenCL 1.2 程序,可以将等距柱状 hdr 图像转换为立方体贴图。

在我的 i7-9750H CPU 上一切正常,但当我在 GTX 1650 GPU 上运行程序时,结果始终不正确。

如果相关的话,我的 GPU 驱动程序版本是 536.99,CPU 驱动程序版本是 7.6.0.0228。

正确 错了

这是代码:

constant sampler_t srcSampler =
    CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

// These transforms specify the directions based on the cube face
// They are based on
// https://www.khronos.org/opengl/wiki_opengl/images/CubeMapAxes.png The order
// is +X, -X, +Y, -Y, +Z, -Z Using the reference image the transforms are
// (horizontal face axis, vertical face axis, face direction)
__constant float3 xTransforms[6] = {
    (float3)(0.0f, 0.0f, 1.0f), (float3)(0.0f, 0.0f, -1.0f),
    (float3)(1.0f, 0.0f, 0.0f), (float3)(1.0f, 0.0f, 0.0f),
    (float3)(1.0f, 0.0f, 0.0f), (float3)(-1.0f, 0.0f, 0.0f)};
__constant float3 yTransforms[6] = {
    (float3)(0.0, -1.0f, 0.0f),  (float3)(0.0f, -1.0f, 0.0f),
    (float3)(0.0f, 0.0f, 1.0f),  (float3)(0.0f, 0.0f, -1.0f),
    (float3)(0.0f, -1.0f, 0.0f), (float3)(0.0f, -1.0f, 0.0f)};
__constant float3 zTransforms[6] = {
    (float3)(-1.0f, 0.0f, 0.0f), (float3)(1.0f, 0.0f, 0.0f),
    (float3)(0.0f, 1.0f, 0.0f),  (float3)(0.0f, -1.0f, 0.0f),
    (float3)(0.0f, 0.0f, 1.0f),  (float3)(0.0f, 0.0f, -1.0f)};

float2 projectSphericalMap(float3 dir) {
  float2 uv = (float2)(atan2pi(dir.z, dir.x) * 0.5, asinpi(dir.y));
  uv += (float2)(0.5f, 0.5f);
  return uv;
}

// The kernel is invoked for every pixel on every face of the cubemap
// 'size' is the size of a cube map face
// 'sizefac' is 2/(size-1) precomputed
__kernel void reproject_environment(__read_only image2d_t srcImage,
                                    __write_only image2d_t dstImage, int size,
                                    float sizefac) {
  int outu = get_global_id(0);
  int outv = get_global_id(1);
  int face = get_global_id(2);

  // This check is probably unnecessary
  if (outu >= size || outv >= size || face >= 6) {
    return;
  }

  // The value range is [-1, 1]
  float horizontal = (float)(outu)*sizefac - 1.0;
  float vertical = (float)(outv)*sizefac - 1.0;

  float3 vec = (float3)(horizontal, vertical, 1.0f);

  float x = dot(vec, xTransforms[face]);
  float y = dot(vec, yTransforms[face]);
  float z = dot(vec, zTransforms[face]);

  float3 dir = (float3)(x, y, z);

  float2 uv = projectSphericalMap(normalize(dir));
  float4 color = read_imagef(srcImage, srcSampler, uv);
  // the cube map faces are stacked vertically
  write_imagef(dstImage, (int2)(outu, outv + size * face), color);
}

这是我第一次使用 OpenCL,所以错误可能非常明显。

gpu opencl gpgpu
1个回答
0
投票

添加一些

printf
进行调试后,我注意到在我的 GPU 上,从
float3
数组读取的
Transform
是错误的。

问题是编译器将数组元素对齐到 4 个浮点,这导致在索引元素时发生 1 个浮点移位。将

float3
更改为
float4
解决了问题。

© www.soinside.com 2019 - 2024. All rights reserved.