我正在尝试使用并行累加器进行循环展开,但我对依赖项和计算感到困惑。 result 和 result2 理想情况下应该彼此独立运行,以便利用 CPU 微架构流水线设计,使它们可以并行执行。这意味着不应使用以下代码:
for (i = degree - 1; i >= 1; i-=2)
{
result = a[i] + x * result;
result2 = a[i-1] + x * result; //same dependency
}
原函数:
double polyh(double a[], double x, long degree)
{
long i;
double result = a[degree];
for (i = degree - 1; i >= 0; i--)
{
result = a[i] + x * result;
}
return result;
}
进入:
double poly_opt(double a[], double x, long degree)
{
long i;
double result = a[degree];
double result_2 = 0;
double result_array[2] = {result, result_2};
double xpwr_1 = x; // 1
double xpwr_2 = x * x; // 2
double xpwr_array[2] = {xpwr_1, xpwr_2};
for (i = degree - 1; i >= 1; i -= 2)
{
result = a[i] + xpwr_1 * result;
result_2 = a[i - 1] + xpwr_2 * result_2;
xpwr_1 = xpwr_1 * x * x;
xpwr_2 = xpwr_2 * x * x;
}
// leftover when input not multiple of loop unrolling factor
for (; i >= 0; --i)
{
result = a[i] + xpwr_1 * result;
xpwr_1 = x * xpwr_1;
}
return result * result_2;
}
我正在尝试引入第二个变量 result2 来执行 2 的循环展开,但由于上面的代码不起作用,我无法获得结果。
这是一次不使用并行累加器和 8 的循环展开因子的尝试,但我想引入新变量以期加快速度:
double poly_opt(double a[], double x, long degree)
{
long i;
double result = a[degree];
for (i = degree - 1; i >= 8; i -= 9)
{
result = a[i - 8] + (a[i - 7] + (a[i - 6] + (a[i - 5] + (a[i - 4] + (a[i - 3] + ((a[i - 2] + (a[i - 1] + (a[i] + x * result) * x) * x) * x)) * x) * x) * x) * x) * x;
}
// leftover when input not multiple of loop unrolling factor
for (; i >= 0; --i)
{
result = a[i] + x * result;
}
return result;
}
我正在使用这些数据集进行测试:
double a[] = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0};
double x = 0.5;
long degree = 359;
int main()
{
std::cout << polyh(a, x, degree) << std::endl;
return 0;
}
正确的输出应该是:3.1428571429