我怎么能为这个函数做循环展开? [关闭]

问题描述 投票:0回答:0

我正在尝试使用并行累加器进行循环展开,但我对依赖项和计算感到困惑。 result 和 result2 理想情况下应该彼此独立运行,以便利用 CPU 微架构流水线设计,使它们可以并行执行。这意味着不应使用以下代码:

for (i = degree - 1; i >= 1; i-=2)
{
    result = a[i] + x * result;
    result2 = a[i-1] + x * result; //same dependency
}

原函数:

double polyh(double a[], double x, long degree)
{
    long i;
    double result = a[degree];
    for (i = degree - 1; i >= 0; i--)
    {
        result = a[i] + x * result;
    }
    return result;
}

进入:

double poly_opt(double a[], double x, long degree)
{
    long i;
    double result = a[degree];
    double result_2 = 0;
    double result_array[2] = {result, result_2};

    double xpwr_1 = x;     // 1
    double xpwr_2 = x * x; // 2
    double xpwr_array[2] = {xpwr_1, xpwr_2};
    for (i = degree - 1; i >= 1; i -= 2)
    {
        result = a[i] + xpwr_1 * result;
        result_2 = a[i - 1] + xpwr_2 * result_2;

        xpwr_1 = xpwr_1 * x * x;
        xpwr_2 = xpwr_2 * x * x;
    }

    // leftover when input not multiple of loop unrolling factor
    for (; i >= 0; --i)
    {
        result = a[i] + xpwr_1 * result;
        xpwr_1 = x * xpwr_1;
    }
    return result * result_2;
}

我正在尝试引入第二个变量 result2 来执行 2 的循环展开,但由于上面的代码不起作用,我无法获得结果。

这是一次不使用并行累加器和 8 的循环展开因子的尝试,但我想引入新变量以期加快速度:

double poly_opt(double a[], double x, long degree)
{
    long i;
    double result = a[degree];
    for (i = degree - 1; i >= 8; i -= 9)
    {
        result = a[i - 8] + (a[i - 7] + (a[i - 6] + (a[i - 5] + (a[i - 4] + (a[i - 3] + ((a[i - 2] + (a[i - 1] + (a[i] + x * result) * x) * x) * x)) * x) * x) * x) * x) * x;
    }

    // leftover when input not multiple of loop unrolling factor
    for (; i >= 0; --i)
    {
        result = a[i] + x * result;
    }

    return result;
}

我正在使用这些数据集进行测试:

double a[] = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0};
double x = 0.5;
long degree = 359;

int main()
{
    std::cout << polyh(a, x, degree) << std::endl;
    return 0;
}

正确的输出应该是:3.1428571429

c++ loops optimization micro-optimization
© www.soinside.com 2019 - 2024. All rights reserved.