我想将 GPU 的纹理内存用于 1D 数组,并在内核中使用它。 我创建一个纹理对象,并将
resDesc.res.linear.devPtr
分配给设备数组,以将所需的数组绑定到纹理对象。问题是,当我获取数据时,它与我绑定到纹理内存的数据不同。这段代码的输出应该是 1, 4, 9, ... 而它是 0,0,0,....
#include<cuda.h>
#include<cuda_runtime.h>
#include<iostream>
#include<stdio.h>
using namespace std;
// Simple transformation kernel
__global__ void squareKernel(float* output,float *dh, int size, cudaTextureObject_t texObj) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
if(x>size)return;
float y=tex1D<float>(texObj, x);
printf("%d, %f, %f\n", x, y, dh[x]);
output[x]=y*y;
}
#define width 10
int main() {
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(width, 0, 0, 0,
cudaChannelFormatKindFloat);
float *hA=(float *)malloc(width*sizeof(float));
for(int i=0;i<width;i++){
hA[i]=i;
}
float *dA;
cudaMalloc((void **)&dA, width*sizeof(float));
cudaMemcpy(dA, hA, width*sizeof(float), cudaMemcpyHostToDevice);
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeLinear;
resDesc.res.linear.devPtr=dA;
resDesc.res.linear.sizeInBytes=width*sizeof(float);
// Specify texture object parameters
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 0;
// Create texture object
cudaTextureObject_t texObj = 0;
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
float* output;
cudaMalloc(&output, width * sizeof(float));
squareKernel<<<1, width>>>(output, dA, width,texObj);
cudaMemcpy(hA, output, width*sizeof(float), cudaMemcpyDeviceToHost);
for(int i=0; i<width;i++){
cout<<i<<","<<hA[i]<<endl;
}
// Destroy texture object
cudaDestroyTextureObject(texObj);
// Free device memory
cudaFree(dA);
free(hA);
return 0;
}
我认为你可以简单地将
tex1D<float>(texObj, x);
替换为 tex1Dfetch<float>(texObj, x);
当资源数据存储在
tex1D
中时,使用 cuArray
。