我对 C++ 类的推力的适用性有疑问。 我正在尝试实现一个类对象,该对象接收顶点的 (x,y,z) 坐标作为 ver1、ver2 和 ver3。然后,分配给一个三角形并计算面积和法向量。
但是,我不太明白如何创建一类推力向量。
这是我从文件中读取的顶点坐标,我想将它们发送到一个类,将它们分配给三角形。这是我们的主要部分。
thrust::host_vector<double> dum(start, end); //dum has coordinates and I create
vertices from it.
thrust::host_vector<double> ver1(dum.begin(),dum.begin()+3); //initilizing elements in CPU.
thrust::host_vector<double> ver2(dum.begin()+3,dum.begin()+6);
thrust::host_vector<double> ver3(dum.begin()+6,dum.end());
thrust::device_vector<double> ver1_gpu = ver1; //copying CPU vectors to GPU vectors.
thrust::device_vector<double> ver2_gpu = ver2;
thrust::device_vector<double> ver3_gpu = ver3;
triangle(ver1_gpu, ver2_gpu, ver3_gpu);
在三角形类中,我尝试初始化前 3 个元素全为零的 3 个顶点。由于每个顶点都有 3 个坐标(x、y 和 z)。 我还初始化了区域和法线变量。
class triangle
{
thrust::device_vector<double>v1(3,0);
thrust::device_vector<double>v2(3,0);
thrust::device_vector<double>v3(3,0);
thrust::device_vector<double>E1(3,0);
thrust::device_vector<double>E2(3,0);
thrust::device_vector<double>E3(3,0);
double normal;
double dummy
double area;
public:
__device__ __host__ triangle(device_vector<double>vert1, device_vector<double>vert2, device_vector<double>vert3)
{
triangle.v1 = vert1;
triangle.v2 = vert2;
triangle.v3 = vert3;
triangle.E1 = vert2 - vert1;
triangle.E2 = vert3 - vert1;
dummy = cross(obj.E2, obj.E1);%% Cross product
triangle.Area = norm(dummy) / 2;
triangle.Normal = dummy / norm(dummy);
}
};
我想在设备中进行所有计算。 我是 cuda 及其库的新手,我知道我在很多地方都错了,但我寻求你的帮助。
以下代码旨在展示如何通过移动语义(非 Thrust 特定)和初始化来避免不必要的复制,并通过在 C++ 类中巧妙使用 Thrust“花式迭代器”(
thrust::transform_iterator
和 thrust::zip_iterator
)来利用 Thrust 向量卸载计算。由于我们的类应该同时处理许多三角形,以利用现代 GPU 的资源并恢复相关的开销,因此它被命名为 Triangles
。为了在 GPU 上的此类带宽受限应用程序中实现良好的性能,全局内存访问的“合并”是关键。实现此目的的一种直接方法是使用所谓的数组结构 (SoA) 语义,例如
struct SoA_example {
double x[N];
double y[N];
double z[N];
};
而不是结构体数组语义,例如
struct Vertex {
double x;
double y;
double z;
}
Vertex AoS_example[N];
这个词汇与下面使用的 C++ 和 Thrust 容器的名称有些冲突,因为我们的“数组”是
thrust::device_vector
,而
std::array
用作“结构”。根据上下文,人们可以想到许多其他构造函数,而不是下面显示的构造函数,例如处理从主机到设备的数据传输,从文件读取值或处理 AoS 格式的(主机)输入。
OPs 问题将
dummy
和
normal
定义为标量,这在数学上没有意义。两个向量的叉积是另一个向量。我在这里更正了这一点。以下代码未经测试但可以编译。
#include <thrust/device_vector.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/zip_function.h>
#include <array>
#include <cstddef>
#include <utility>
template <typename T>
struct CrossProduct {
__host__ __device__ T operator()(T a, T b, T c, T d) const {
return a * b - c * d;
}
};
template <typename T>
struct TriangleArea {
__host__ __device__ T operator()(T x, T y, T z) const {
return sqrt(x * x + y * y + z * z) / 2;
}
};
template <typename T>
struct NormalizeUsingArea {
__host__ __device__ T operator()(T &val_x, T &val_y, T &val_z,
T area) const {
T norm = 0.5 / area;
val_x *= norm;
val_y *= norm;
val_z *= norm;
}
};
template <typename T>
class Triangles {
static constexpr int x_dim = 0;
static constexpr int y_dim = 1;
static constexpr int z_dim = 2;
static constexpr int n_dims = 3;
using Container = thrust::device_vector<T>;
using Vectors = std::array<Container, n_dims>;
std::ptrdiff_t n_triangles{};
Vectors v1;
Vectors v2;
Vectors v3;
Vectors E1;
Vectors E2;
// Vectors E3;
// Vectors dummies;
Vectors normals;
Container areas;
// helper functions
auto make_minus_iterator(Vectors &vs_1, Vectors &vs_2, int component) {
return thrust::make_transform_iterator(
thrust::make_zip_iterator(vs_1[component].cbegin(),
vs_2[component].cbegin()),
thrust::make_zip_function(thrust::minus<T>{}));
}
template <int component>
auto make_crossprod_iterator(Vectors &vs_1, Vectors &vs_2) {
static_assert(component >= x_dim && component < n_dims);
static constexpr int dim_1 = (component + 1) % n_dims;
static constexpr int dim_2 = (component + 2) % n_dims;
return thrust::make_transform_iterator(
thrust::make_zip_iterator(
vs_1[dim_1].cbegin(), vs_2[dim_2].cbegin(),
vs_1[dim_2].cbegin(), vs_2[dim_1].cbegin()),
thrust::make_zip_function(CrossProduct<T>{}));
}
auto make_area_iterator(Vectors &crossproduct) {
return thrust::make_transform_iterator(
thrust::make_zip_iterator(crossproduct[x_dim].cbegin(),
crossproduct[y_dim].cbegin(),
crossproduct[z_dim].cbegin()),
thrust::make_zip_function(TriangleArea<T>{}));
}
auto make_zip_iterator(Vectors &vecs, const Container &scalars) {
return thrust::make_zip_iterator(vecs[x_dim].begin(),
vecs[y_dim].begin(),
vecs[z_dim].begin(), scalars.cbegin());
}
public:
// The following constructor is just an example based on OPs constructor.
// Use fancy iterators to avoid unnecessary initialization to 0 of E1, E2,
// ... Depending on the use case it might make more sense to just have the
// iterators as members and compute E1, E2, etc on the fly when needed and
// get rid of their Vectors members (kernel fusion).
Triangles(
thrust::device_vector<T> &&vert1_x, thrust::device_vector<T> &&vert1_y,
thrust::device_vector<T> &&vert1_z, thrust::device_vector<T> &&vert2_x,
thrust::device_vector<T> &&vert2_y, thrust::device_vector<T> &&vert2_z,
thrust::device_vector<T> &&vert3_x, thrust::device_vector<T> &&vert3_y,
thrust::device_vector<T> &&vert3_z)
: n_triangles{static_cast<std::ptrdiff_t>(vert1_x.size())},
// move device_vectors with vertices into class (avoids expensive
// copies)
v1{std::move(vert1_x), std::move(vert1_y), std::move(vert1_z)},
v2{std::move(vert2_x), std::move(vert2_y), std::move(vert2_z)},
v3{std::move(vert3_x), std::move(vert3_y), std::move(vert3_z)},
// calculate diffs and initialize E1, E2 with them
E1{Container(make_minus_iterator(v2, v1, x_dim),
make_minus_iterator(v2, v1, x_dim) + n_triangles),
Container(make_minus_iterator(v2, v1, y_dim),
make_minus_iterator(v2, v1, y_dim) + n_triangles),
Container(make_minus_iterator(v2, v1, z_dim),
make_minus_iterator(v2, v1, z_dim) + n_triangles)},
E2{Container(make_minus_iterator(v3, v1, x_dim),
make_minus_iterator(v3, v1, x_dim) + n_triangles),
Container(make_minus_iterator(v3, v1, y_dim),
make_minus_iterator(v3, v1, y_dim) + n_triangles),
Container(make_minus_iterator(v3, v1, z_dim),
make_minus_iterator(v3, v1, z_dim) + n_triangles)},
// calculate cross-products and initialize normals with them(normalize
// later)
normals{
Container(make_crossprod_iterator<x_dim>(E2, E1),
make_crossprod_iterator<x_dim>(E2, E1) + n_triangles),
Container(make_crossprod_iterator<y_dim>(E2, E1),
make_crossprod_iterator<y_dim>(E2, E1) + n_triangles),
Container(make_crossprod_iterator<z_dim>(E2, E1),
make_crossprod_iterator<z_dim>(E2, E1) + n_triangles)},
// calculate areas and initialize with them
areas(make_area_iterator(normals),
make_area_iterator(normals) + n_triangles) {
// normalize normals
thrust::for_each_n(
make_zip_iterator(normals, areas), n_triangles,
thrust::make_zip_function(NormalizeUsingArea<double>{}));
}
};
// expicit instantiation to find compilation errors on godbolt.com
template class Triangles<double>;