// input:
int array[10] = {1,9,9,6,0,1,1,6,1,4};
int set_num = -1;
int index[4] = {0,2,4,6};
// output:
array[10] = {-1,9,-1,6,-1,1,-1,6,1,4};
#include <cstdlib>
#include <iostream>
#include <cstdio>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/copy.h>
#include <thrust/sequence.h>
#include <thrust/functional.h>
// kernel function
template <typename T>
__global__ void setNumInArray(T *arrays, int *index, T set_num, int num_index)
{
unsigned int tid = threadIdx.x + blockDim.x * blockIdx.x;
if (tid > num_index)
return;
arrays[index[tid]] = set_num;
}
int main()
{
const unsigned int vec_length = 4069; // length of the array
thrust::device_vector<int>vec(vec_length);
thrust::sequence(thrust::device, vec.begin(), vec.end(), 1); // the values are generated by thrust::sequence
const int num_index = 16; // length of the index array
thrust::device_vector<int>index(num_index); // the index array is generated by thrust::sequence
thrust::sequence(thrust::device, index.begin(), index.end(), 64,10);
// type convert: thrust->normal device pointer
int *d_vec = reinterpret_cast<int*>(thrust::raw_pointer_cast(vec.data()));
int *d_index = reinterpret_cast<int*>(thrust::raw_pointer_cast(index.data()));
int set_num = 0; // the value I want to set
int block_size = 64;
setNumInArray<int> <<<1, block_size >>> (d_vec, d_index, set_num, num_index);
thrust::host_vector<int>h_vec(vec_length); // check the result
h_vec = vec;
for (int ii = 0; ii < vec_length; ii++) {
printf("%d: %d\n",ii+1,h_vec[ii]);
}
return 0;
}
该程序可以输出正确的答案。但是,我想避免在推力变量和通用设备变量之间进行类型转换(我在项目的另一部分中使用了很多推力函数),并且还避免了调整块/网格大小以获得更好的性能。那么如何使用thrust实现它?
您可以将推力::: for_each与lambda函数一起使用。但是我还没有对此进行基准测试,也许您的内核更快。
int main()
{
const unsigned int vec_length = 4069;
thrust::device_vector<int>vec(vec_length);
thrust::sequence(thrust::device, vec.begin(), vec.end(), 1);
const int num_index = 16;
thrust::device_vector<int>index(num_index);
thrust::sequence(thrust::device, index.begin(), index.end(), 64,10);
int *d_vec = thrust::raw_pointer_cast(vec.data());
int set_num = 0;
auto changeValue = [=] __device__(int y) { d_vec[y] = set_num; };
thrust::for_each(thrust::device, index.begin(), index.end(), changeValue);
thrust::host_vector<int>h_vec(vec_length);
h_vec = vec;
for (int ii = 0; ii < vec_length; ii++) {
if(h_vec[ii] == set_num)
printf("%d: %d\n",ii+1,h_vec[ii]);
}
return 0;
}