我有三个文件:
主.cu
#include <cuda_runtime.h>
#include <stdio.h>
#include "kernels.cuh"
__constant__ float deviceConstVar;
void setConstantValue(float value) {
cudaMemcpyToSymbol(deviceConstVar, &value, sizeof(float));
cudaDeviceSynchronize();
}
int main() {
setConstantValue(1.23f);
printConstantValue << <1, 1 >> > ();
cudaDeviceSynchronize();
return 0;
}
kernels.cu
#include <stdio.h>
#include <cuda_runtime.h>
extern __constant__ float deviceConstVar;
__global__ void printConstantValue() {
printf("deviceConstVar = %f\n", deviceConstVar);
}
kernels.cuh
// constants.h
#ifndef CONSTANTS_H
#define CONSTANTS_H
#include "cuda_runtime.h"
__global__ void printConstantValue();
#endif // CONSTANTS_H
内核打印 0.000,但不是预期的 1.23。
看来
__constant__
par 没有传输到内核。
我使用视觉来构建,命令似乎是:
1>------ Rebuild All started: Project: CudaRuntime1, Configuration: Debug x64 ------
1>Compiling CUDA source file kernels.cu...
1>Compiling CUDA source file main.cu...
1>
1>C:\ShaloTide\CudaRuntime1>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.36.32532\bin\HostX64\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -DWIN32 -DWIN64 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /FS /Zi /RTC1 /MDd " -Xcompiler "/Fdx64\Debug\vc143.pdb" -o C:\ShaloTide\CudaRuntime1\x64\Debug\main.cu.obj "C:\ShaloTide\CudaRuntime1\main.cu"
1>
1>C:\ShaloTide\CudaRuntime1>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.36.32532\bin\HostX64\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -DWIN32 -DWIN64 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /FS /Zi /RTC1 /MDd " -Xcompiler "/Fdx64\Debug\vc143.pdb" -o C:\ShaloTide\CudaRuntime1\x64\Debug\kernels.cu.obj "C:\ShaloTide\CudaRuntime1\kernels.cu"
1>C:\ShaloTide\CudaRuntime1\kernels.cu(4): warning #20044-D: extern declaration of the entity deviceConstVar is treated as a static definition
1>
1>main.cu
1>kernels.cu
1>Done building project "CudaRuntime1.vcxproj".
1> Creating library C:\ShaloTide\CudaRuntime1\x64\Debug\CudaRuntime1.lib and object C:\ShaloTide\CudaRuntime1\x64\Debug\CudaRuntime1.exp
1>CudaRuntime1.vcxproj -> C:\ShaloTide\CudaRuntime1\x64\Debug\CudaRuntime1.exe
========== Rebuild All: 1 succeeded, 0 failed, 0 skipped ==========
========== Rebuild started at 8:33 PM and took 02.316 seconds ==========
这是从 Visual Studio 复制的
感谢@paleonix。代码本身没有问题。问题在于需要在构建过程中激活设备链接。在 Visual Studio 中,在右侧的解决方案资源管理器中,选择所有 .cu 文件,然后右键单击,导航到“属性”->“CUDA C/C++”->“通用”->“生成可重定位设备代码”,并将其从“否”改为“-rdc=true”。这应该可以解决问题。