快速准确的atan/arctan近似算法

Question

有什么快速准确的 atan/arctan 近似函数/算法吗？输入：x = (0, 1]（最小 x 要求）。输出：以弧度为单位

double FastArcTan(double x)
{
    return M_PI_4*x - x*(fabs(x) - 1)*(0.2447 + 0.0663*fabs(x));
}

我在网上找到了这个函数，但它给出的最大误差为 1.6 弧度，这太大了。

Answer 1

OP 报告的最大误差为 1.6 弧度（92 度），这与下面测试 OP 的代码不一致，最大误差输入

：0 到 1 范围约为 0.0015 弧度。我怀疑代码错误或测试超出了 0...1 的范围。 OP 是指 1.6 毫弧度吗？

也许更准确的

a*x^5 + b*x^3 + c*x

对于OP来说仍然足够快。在我的机器上，平均准确率提高了 4 倍，最坏情况下准确率提高了 2 倍。它使用 @Foon 和 @Matthew Pope

建议的最佳三项多项式

#include <math.h>
#include <stdio.h>

#ifndef M_PI_4
#define M_PI_4 (3.1415926535897932384626433832795/4.0)
#endif

double FastArcTan(double x) {
  return M_PI_4*x - x*(fabs(x) - 1)*(0.2447 + 0.0663*fabs(x));
}

#define A 0.0776509570923569
#define B -0.287434475393028
#define C (M_PI_4 - A - B)
#define FMT "% 16.8f"

double Fast2ArcTan(double x) {
  double xx = x * x;
  return ((A*xx + B)*xx + C)*x;
}

int main() {
  double mxe1 = 0, mxe2 = 0;
  double err1 = 0, err2 = 0;
  int n = 100;
  for (int i=-n;i<=n; i++) {
    double x = 1.0*i/n;
    double y = atan(x);
    double y_fast1 = FastArcTan(x);
    double y_fast2 = Fast2ArcTan(x);
    printf("%3d x:% .3f y:" FMT "y1:" FMT "y2:" FMT "\n", i, x, y, y_fast1, y_fast2);
    if (fabs(y_fast1 - y) > mxe1 ) mxe1  = fabs(y_fast1 - y);
    if (fabs(y_fast2 - y) > mxe2 ) mxe2  = fabs(y_fast2 - y);
    err1 += (y_fast1 - y)*(y_fast1 - y);
    err2 += (y_fast2 - y)*(y_fast2 - y);
  }
  printf("max error1: " FMT "sum sq1:" FMT "\n", mxe1, err1);
  printf("max error2: " FMT "sum sq2:" FMT "\n", mxe2, err2);
}

输出

 ...
 96 x: 0.960 y:      0.76499283y1:      0.76582280y2:      0.76438526
 97 x: 0.970 y:      0.77017091y1:      0.77082844y2:      0.76967407
 98 x: 0.980 y:      0.77529750y1:      0.77575981y2:      0.77493733
 99 x: 0.990 y:      0.78037308y1:      0.78061652y2:      0.78017777
100 x: 1.000 y:      0.78539816y1:      0.78539816y2:      0.78539816
max error1:       0.00150847sum sq1:      0.00023062
max error2:       0.00084283sum sq2:      0.00004826

不清楚为什么 OP 的代码使用

fabs()

给出“输入：x = (0, 1]”。

Answer 2

我根据 Pade-Chebyshev 近似开发了以下函数来满足我的需求。该代码是在 Visual Studio (x86) 下编写的，适用于整个源数据范围。该函数的准确度仅略低于标准 atanf 函数，但速度明显快于标准 atanf 函数。

// Fast arctangent with single precission
_declspec(naked) float _vectorcall arctg(float x)
{
  // When |x|<=1 following formula is used:
  //
  //                       a0 + a0*a1*x^2 + a0*a2*x^4 + a0*a3*x^6
  // arctg x = x * ------------------------------------------------------- = x*P(x)/Q(x)
  //                a0 + a0*(a1+b0)*x^2 + a0*(a2+b1)*x^4 + a0*(a3+b2)*x^6
  //
  // The a0 constant is reduced, but it is needed to less the error
  // and prevent overflow at large |x|.
  // P(x) and Q(x) are 3th degree polinomials of x^2.
  // When 1<|x|<62919776 (approx.) used formula is
  //
  //                                     pi/2*|x|*x^6*Q(1/x)-x^6*P(1/x)
  // arctg x = pi/2*sgn(x)-arctg(1/x) = --------------------------------
  //                                             x*x^6*Q(1/x)
  //
  // Here x^6*P(1/x) and x^6*Q(1/x) are 3th degree polinomials of x^2 too.
  // When |x|>=62919776 used formula is arctg x = pi/2*sgn(x)
  // To improve accuracy at |x|>1 the constant pi/2 is replaced by the sum (pi-3)/2 and 3/2.
  //
  static const float ct[14] =   // Constants table
  {
    6.28740248E-17f,            // a0*(a1+b0)=a0*c1
    4.86816205E-17f,            // a0*a1
    2.24874633E-18f,            // a0*(a3+b2)=a0*c3
    4.02179944E-19f,            // a0*a3
    4.25772129E-17f,            // a0
    4.25772129E-17f,            // a0
    2.50182216E-17f,            // a0*(a2+b1)=a0*c2
    1.25756219E-17f,            // a0*a2
    0.0707963258f,              // (pi-3)/2
    1.5f,                       // 3/2
    1.0f,                       // 1
    -0.0707963258f,             // -(pi-3)/2
    -1.5f,                      // -3/2
    3.95889818E15f              // Threshold of x^2 when arctg(x)=pi/2*sgn(x)
  };
  _asm
  {
    vshufps xmm1,xmm0,xmm0,0    // xmm1 = x # x : x # x
    mov edx,offset ct           // edx contains the address of constants table
    vmulps xmm2,xmm1,xmm1       // xmm2 = x^2 # x^2 : x^2 # x^2
    vmovups xmm3,[edx+16]       // xmm3 = a0*a2 # a0*c2 : a0 # a0
    vmulps xmm4,xmm2,xmm2       // xmm4 = y^2 # y^2 : y^2 # y^2
    vucomiss xmm2,[edx+40]      // Compare y=x^2 to 1
    ja arctg_big                // Jump if |x|>1
    vfmadd231ps xmm3,xmm2,[edx] // xmm3 ~ a3*y+a2 # c3*y+c2 : a1*y+1 # c1*y+1
    vmovhlps xmm1,xmm1,xmm3     // xmm1 ~ a3*y+a2 # c3*y+c2
    vfmadd231ps xmm3,xmm4,xmm1  // xmm3 ~ a3*y^3+a2*y^2+a1*y+1 # c3*y^3+c2*y^2+c1*y+1
    vmovshdup xmm2,xmm3         // xmm2 = P; xmm3 = Q
    vdivss xmm2,xmm2,xmm3       // xmm2 = P/Q
    vmulss xmm0,xmm0,xmm2       // xmm0 = x*P/Q = arctg(x)
    ret                         // Return
      arctg_big:                // When |x|>1 use formula pi/2*sgn(x)-arctg(1/x)
    vfmadd213ps xmm3,xmm2,[edx] // xmm3 ~ a2*y+a3 # c2*y+c3 : y+a1 # y+c1
    vmovmskpd eax,xmm1          // eax=3, if x<0, otherwise eax=0
    vmovhlps xmm0,xmm0,xmm3     // xmm0 ~ a2*y+a3 # c2*y+c3
    vfmadd213ps xmm3,xmm4,xmm0  // xmm3 ~ y^3+a1*y^2+a2*y+a3 # y^3+c1*y^2+c2*y+c3
    vmovss xmm0,[edx+4*eax+32]  // xmm0 = (pi-3)/2*sgn(x)
    vucomiss xmm2,[edx+52]      // Compare y=x^2 to threshold value
    jnb arctg_end               // The data is already in xmm0, if |x|>=62919776
    vmovshdup xmm4,xmm3         // xmm4 = P; xmm3 = Q
    vmulss xmm1,xmm1,xmm3       // xmm1 = x*Q
    vfmsub132ss xmm0,xmm4,xmm1  // xmm0 = (pi-3)/2*|x|*Q-P
    vdivss xmm0,xmm0,xmm1       // xmm0 = (pi-3)/2*sgn(x)-P/(x*Q)
      arctg_end:                // Add to result 3/2*sgn(x)
    vaddss xmm0,xmm0,[edx+4*eax+36] // xmm0 = pi/2*sgn(x)-P/(x*Q)
    ret                         // Return
  }
}

Answer 3

反正切似乎可以很好地近似为

z=x/(x+pi/3)

空间中的多项式。以下系数在您的区间上产生的最大误差为

4.07e-9

。

z0 = 0.0;
z1 = 1.0471985201240248;
z2 = 1.047102380719068;
z3 = 0.6678082665364844;
z4 = -0.16312555173050677;
z5 = -0.33989938855441837;
z6 = -5.8773286456840355;
z7 = 17.33961311116544;
z8 = -49.09950369422959;
z9 = 82.10190353000648;
z10 = -60.11210171537528;
z11 = 14.183376881192657;

您还可以使用高阶多项式（第 19 次）深入了解

1.23e-14

。

这里有一些 C# 代码（参考 Accord.net），用于查看精度和系数数量之间的权衡。

using Accord.Math;
using Accord.Statistics.Models.Regression.Linear;
using System.Management;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

foreach(var degree in new[] { 3, 5, 7, 11, 15, 19, 23, 27 })
{
    var N = 5000;
    var ols = new OrdinaryLeastSquares() { UseIntercept = false };

    var inp = new double[N][];
    var outputs = new double[N];

    for (int i = 0; i < N; ++i)
    {
        double x = (double)i / (double)N;

        inp[i] = new double[degree];
        inp[i][0] = x / (Math.PI /3 + x);
        for (int j = 1; j < degree; ++j)
            inp[i][j] = inp[i][0] * inp[i][j - 1];

        outputs[i] = Math.Atan(x);
    }

    MultipleLinearRegression regression = ols.Learn(inp, outputs);

    var r = new List<double>();
    foreach (var w in regression.Weights)
        Console.WriteLine(w);

    var model = inp.Dot(regression.Weights);
    var diff = 0.0;

    for (int i = 0; i < N; ++i)
        diff = Math.Max(Math.Abs(model[i] - outputs[i]), diff);

    Console.WriteLine("++++++++++++++++++++++");
    Console.WriteLine(diff);
    Console.WriteLine("++++++++++++++++++++++");
}
Console.ReadLine();

Answer 4

我发现这个用于 x [0,+inf]

atan(x) = 8x/(3+sqrt(25+80/3*x^2))

对于 x>2 我添加 +0.02 来修复

快速准确的atan/arctan近似算法

问题描述投票：0回答：4

4个回答

最新问题

快速准确的atan/arctan近似算法

问题描述 投票：0回答：4

4个回答

最新问题

问题描述投票：0回答：4