我有一个非常简单的 CUDA 程序,但无法编译
这是
main.cpp
#include <iostream>
#include <cstdlib>
#include "/opt/cuda/targets/x86_64-linux/include/cuda_runtime.h"
#include "/opt/cuda/targets/x86_64-linux/include/cublas_v2.h"
// Function to initialize a matrix with random values
void initMatrix(float* matrix, int rows, int cols) {
for (int i = 0; i < rows * cols; ++i) {
matrix[i] = rand() / (float)RAND_MAX; // Initialize with random values
}
}
// Function to print a matrix
void printMatrix(float* matrix, int rows, int cols) {
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
std::cout << matrix[i * cols + j] << " ";
}
std::cout << std::endl;
}
}
int main() {
// Matrix dimensions
int m = 3; // Number of rows
int n = 3; // Number of columns
// Allocate host memory for the matrix and vector
float* h_matrix = new float[m * n];
float* h_vector = new float[n];
// Initialize matrix and vector with random values
initMatrix(h_matrix, m, n);
initMatrix(h_vector, n, 1);
// Print the matrix and vector
std::cout << "Matrix:" << std::endl;
printMatrix(h_matrix, m, n);
std::cout << "\nVector:" << std::endl;
printMatrix(h_vector, n, 1);
// Allocate device memory for the matrix and vector
float* d_matrix;
float* d_vector;
cudaMalloc((void**)&d_matrix, m * n * sizeof(float));
cudaMalloc((void**)&d_vector, n * sizeof(float));
// Transfer the matrix and vector from host to device
cudaMemcpy(d_matrix, h_matrix, m * n * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_vector, h_vector, n * sizeof(float), cudaMemcpyHostToDevice);
// cuBLAS handle
cublasHandle_t handle;
cublasCreate(&handle);
// Perform matrix-vector multiplication
float alpha = 1.0; // Scaling factor
float beta = 0.0; // Scaling factor
cublasSgemv(handle, CUBLAS_OP_N, m, n, &alpha, d_matrix, m, d_vector, 1, &beta, d_vector, 1);
// Transfer the result back to the host
cudaMemcpy(h_vector, d_vector, m * sizeof(float), cudaMemcpyDeviceToHost);
// Print the result
std::cout << "\nResult:" << std::endl;
printMatrix(h_vector, m, 1);
// Clean up
delete[] h_matrix;
delete[] h_vector;
cudaFree(d_matrix);
cudaFree(d_vector);
cublasDestroy(handle);
return 0;
}
这是
CMakeLists.txt
:
cmake_minimum_required(VERSION 3.17)
project(CUBLAS_Matrix_Vector_Multiplication LANGUAGES CXX CUDA)
find_package(CUDAToolkit REQUIRED)
find_package(CUDA REQUIRED)
list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
add_definitions(-std=c++11)
set(CUDA_SEPARABLE_COMPILATION ON)
add_executable(matrix_vector_multiplication main.cpp)
target_include_directories(matrix_vector_multiplication PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
set_target_properties(matrix_vector_multiplication PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_STANDARD 11)
target_link_libraries(matrix_vector_multiplication PRIVATE ${CUDAToolkit_LIBRARIES})
尝试通过
cmake
创建配置文件来编译它。但随后 make
会导致很多错误:
/usr/bin/ld: CMakeFiles/matrix_vector_multiplication.dir/main.cpp.o: in function `main':
main.cpp:(.text+0x8056): undefined reference to `cudaMalloc'
/usr/bin/ld: main.cpp:(.text+0x8072): undefined reference to `cudaMalloc'
/usr/bin/ld: main.cpp:(.text+0x8098): undefined reference to `cudaMemcpy'
/usr/bin/ld: main.cpp:(.text+0x80ba): undefined reference to `cudaMemcpy'
/usr/bin/ld: main.cpp:(.text+0x80c6): undefined reference to `cublasCreate_v2'
/usr/bin/ld: main.cpp:(.text+0x8112): undefined reference to `cublasSgemv_v2'
/usr/bin/ld: main.cpp:(.text+0x8138): undefined reference to `cudaMemcpy'
/usr/bin/ld: main.cpp:(.text+0x81c6): undefined reference to `cudaFree'
/usr/bin/ld: main.cpp:(.text+0x81d2): undefined reference to `cudaFree'
/usr/bin/ld: main.cpp:(.text+0x81de): undefined reference to `cublasDestroy_v2'
collect2: Fehler: ld gab 1 als Ende-Status zurück
make[2]: *** [CMakeFiles/matrix_vector_multiplication.dir/build.make:97: matrix_vector_multiplication] Fehler 1
make[1]: *** [CMakeFiles/Makefile2:83: CMakeFiles/matrix_vector_multiplication.dir/all] Fehler 2
make: *** [Makefile:91: all] Fehler 2
尝试解决
我尝试遵循这个问题,并使用:
g++ main.cpp -o main -L/opt/cuda/lib64/ -lcudart -lcuda -L/opt/cuda
这会引发错误:
In file included from /opt/cuda/targets/x86_64-linux/include/cublas_v2.h:69,
from main.cpp:4:
/opt/cuda/targets/x86_64-linux/include/cublas_api.h:77:10: fatal error: cuda_fp16.h: No such file or directory
77 | #include <cuda_fp16.h>
| ^~~~~~~~~~~~~
compilation terminated.
如何解决这个问题?
nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Fri_Sep__8_19:17:24_PDT_2023
Cuda compilation tools, release 12.3, V12.3.52
Build cuda_12.3.r12.3/compiler.33281558_0
对我来说这很好用:
cmake_minimum_required(VERSION 3.17)
project(CUBLAS_Matrix_Vector_Multiplication LANGUAGES CXX)
find_package(CUDAToolkit REQUIRED)
add_executable(matrix_vector_multiplication main.cpp)
target_link_libraries(matrix_vector_multiplication PRIVATE
CUDA::cublas
CUDA::cudart
)
target_compile_features(matrix_vector_multiplication PRIVATE cxx_std_11)
当您将包含更改为
#include <cuda_runtime.h>
#include <cublas_v2.h>