我对CUDA相当陌生,我正在尝试在结构上应用reduce_by_key
操作。
struct index_and_loc {
int index;
int3 location;
}
我想做的是,我有一个index_and_loc
的向量,其中元素将具有相同的index
thrust::host_vector<index_and_loc> my_vector;
my_vector= { {0, {0, 2, 5}},
{0, {1, 3, 4}},
{0, {0, 1, 3}},
{1, {2, 1, 0}},
{1, {2, 2, 2}}
}
并且我想有两个输出,一个是具有相同索引的所有元素的总和,另一个是存储每个索引有多少个实例的向量。所以我的输出结果是:
// Sum all elements with index 0 and 1
sum_vector = {{0, {1, 6, 12}},
{1, {4, 3, 2}}
}
instances_vector = {3, // Number of elements with index = 0
2} // Number of elements with index = 1
阅读推力文档中的方法,我相信我应该使用reduce_by_key
。我可以将input1
声明为my_vector
,将input2
声明为长度为1的向量,并且长度与input1
相同,并且可以减少使用结构的索引并将thrust::plus<int>
用作我的BinaryFunction 1`s的向量。但是,这将无法应用我的input1向量中的int3
元素之和,因为BinaryFunction应用于input2
。
有什么方法可以做到这一点?如果我的问题不太清楚,请告诉我。
编辑:
我重新设计了问题,并将其归结为更简单的方法。相反,我在my_vector
中添加了一个实例列并将其设置为1。现在,按照标准思想,我可以通过将reduce_by_key
用作键和值并将my_vector
元素和[ C0]元素并获得我需要的两个结果。我已经重新创建了一个工作示例
int3
但是我收到此错误:
instance
似乎我没有使用#include <thrust/extrema.h>
#include <thrust/reduce.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
struct index_and_loc {
int index;
int3 location;
int instance;
__host__ __device__
index_and_loc(int index_, int3 location_, int instance_) :
index(index_), location(location_), instance(instance_) {}
__host__ __device__
index_and_loc() {}
};
struct equal_index {
__host__ __device__
bool operator()(const index_and_loc &lhs, const index_and_loc &rhs) const {
return lhs.index == rhs.index;
}
};
struct sum_points {
__host__ __device__
index_and_loc operator()(const index_and_loc &lhs, const index_and_loc &rhs) const {
int3 sum_;
sum_.x = lhs.location.x + rhs.location.x;
sum_.y = lhs.location.y + rhs.location.y;
sum_.z = lhs.location.z + rhs.location.z;
return index_and_loc(lhs.index, sum_, lhs.instance + rhs.instance);
}
};
int main() {
int num_points = 5;
thrust::host_vector<index_and_loc> my_vector(num_points);
my_vector.push_back(index_and_loc(0, {0, 2, 5}, 1));
my_vector.push_back(index_and_loc(0, {1, 3, 4}, 1));
my_vector.push_back(index_and_loc(0, {0, 1, 3}, 1));
my_vector.push_back(index_and_loc(1, {2, 1, 0}, 1));
my_vector.push_back(index_and_loc(1, {2, 2, 2}, 1));
thrust::pair<index_and_loc*, index_and_loc*> sum_count;
thrust::host_vector<index_and_loc> same_idx(num_points);
thrust::host_vector<index_and_loc> sum_locs(num_points);
sum_count = thrust::reduce_by_key(thrust::device, my_vector.begin(), my_vector.end(),
my_vector.begin(), same_idx.begin(), sum_locs.begin(), equal_index(), sum_points());
}
的适当构造函数,而且我不确定如何执行。
有很多方法可以潜在地做您想做的事。我对什么是有效和什么无效没有感觉,这取决于您自己决定。
一种方法是在您的类中定义必要的比较和加法运算符,然后将包装在预填充推力二元仿函数中的那些用于所需的二元运算符,并将谓词用于/opt/cuda/bin/..//include/thrust/detail/pair.inl(48): error: no suitable conversion function from "const thrust::detail::normal_iterator<index_and_loc *>" to "index_and_loc *" exists
detected during instantiation of "thrust::pair<T1, T2>::pair(const thrust::pair<U1, U2> &) [with T1=index_and_loc *, T2=index_and_loc *, U1=thrust::detail::normal_iterator<index_and_loc *>, U2=thrust::detail::normal_iterator<index_and_loc *>]"
test2.cu(49): here
/opt/cuda/bin/..//include/thrust/detail/pair.inl(48): error: no suitable conversion function from "const thrust::detail::normal_iterator<index_and_loc *>" to "index_and_loc *" exists
detected during instantiation of "thrust::pair<T1, T2>::pair(const thrust::pair<U1, U2> &) [with T1=index_and_loc *, T2=index_and_loc *, U1=thrust::detail::normal_iterator<index_and_loc *>, U2=thrust::detail::normal_iterator<index_and_loc *>]"
test2.cu(49): here
2 errors detected in the compilation of "/tmp/tmpxft_00005985_00000000-8_test2.cpp1.ii".
调用。例如:
thrust::detail::normal_iterator<>
它们的关键是谓词reduce_by_key
和归约#include <thrust/functional.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <iostream>
struct index_and_loc {
int index;
int3 location;
index_and_loc() = default;
__host__ __device__
index_and_loc(int index_, int3 location_) {
index = index_;
location.x = location_.x;
location.y = location_.y;
location.z = location_.z;
};
__host__ __device__
index_and_loc& operator=(const index_and_loc& y) {
index = y.index;
location.x = y.location.x;
location.y = y.location.y;
location.z = y.location.z;
return *this;
};
__host__ __device__
bool operator==(const index_and_loc& y) const {
return index == y.index;
};
__host__ __device__
index_and_loc operator+(const index_and_loc& y) const {
return index_and_loc(index, make_int3(location.x + y.location.x,
location.y + y.location.y,
location.z + y.location.z));
};
};
int main()
{
thrust::host_vector<index_and_loc> my_vector(5);
my_vector[0] = {0, {0, 2, 5}};
my_vector[1] = {0, {1, 3, 4}};
my_vector[2] = {0, {0, 1, 3}};
my_vector[3] = {1, {2, 1, 0}};
my_vector[4] = {1, {2, 2, 2}};
thrust::device_vector<index_and_loc> d_vector = my_vector;
thrust::device_vector<index_and_loc> keys_out(5);
thrust::device_vector<index_and_loc> data_out(5);
thrust::equal_to<index_and_loc> pred;
thrust::plus<index_and_loc> op;
auto res = thrust::reduce_by_key(
d_vector.begin(),
d_vector.end(),
d_vector.begin(),
keys_out.begin(),
data_out.begin(),
pred,
op);
for(int i=0; i<2; i++) {
index_and_loc y = keys_out[i];
index_and_loc x = data_out[i];
std::cout << y.index << " {" << x.location.x
<< " " << x.location.y
<< " " << x.location.z
<< "}" << std::endl;
}
return 0;
}
的正确定义。其余只是复制分配和构造所需要的。
这似乎可以满足您的要求:
operator==
注意,因为推力期望键和数据是单独的迭代器,所以对于缩减的键和数据,必须具有单独的输出迭代器。这意味着您可以有效地获得解决方案的两个副本。这是否大事由您决定。