有什么快速准确的 atan/arctan 近似函数/算法吗?输入:x = (0, 1](最小 x 要求)。输出:以弧度为单位
double FastArcTan(double x)
{
return M_PI_4*x - x*(fabs(x) - 1)*(0.2447 + 0.0663*fabs(x));
}
我在网上找到了这个函数,但它给出的最大误差为 1.6 弧度,这太大了。
OP 报告的最大误差为 1.6 弧度(92 度),这与下面测试 OP 的代码不一致,最大误差输入
x
:0 到 1 范围约为 0.0015 弧度。我怀疑代码错误或测试超出了 0...1 的范围。 OP 是指 1.6 毫弧度吗?
也许更准确的
a*x^5 + b*x^3 + c*x
对于OP来说仍然足够快。在我的机器上,平均准确率提高了 4 倍,最坏情况下准确率提高了 2 倍。它使用 @Foon 和 @Matthew Pope 建议的最佳三项多项式
#include <math.h>
#include <stdio.h>
#ifndef M_PI_4
#define M_PI_4 (3.1415926535897932384626433832795/4.0)
#endif
double FastArcTan(double x) {
return M_PI_4*x - x*(fabs(x) - 1)*(0.2447 + 0.0663*fabs(x));
}
#define A 0.0776509570923569
#define B -0.287434475393028
#define C (M_PI_4 - A - B)
#define FMT "% 16.8f"
double Fast2ArcTan(double x) {
double xx = x * x;
return ((A*xx + B)*xx + C)*x;
}
int main() {
double mxe1 = 0, mxe2 = 0;
double err1 = 0, err2 = 0;
int n = 100;
for (int i=-n;i<=n; i++) {
double x = 1.0*i/n;
double y = atan(x);
double y_fast1 = FastArcTan(x);
double y_fast2 = Fast2ArcTan(x);
printf("%3d x:% .3f y:" FMT "y1:" FMT "y2:" FMT "\n", i, x, y, y_fast1, y_fast2);
if (fabs(y_fast1 - y) > mxe1 ) mxe1 = fabs(y_fast1 - y);
if (fabs(y_fast2 - y) > mxe2 ) mxe2 = fabs(y_fast2 - y);
err1 += (y_fast1 - y)*(y_fast1 - y);
err2 += (y_fast2 - y)*(y_fast2 - y);
}
printf("max error1: " FMT "sum sq1:" FMT "\n", mxe1, err1);
printf("max error2: " FMT "sum sq2:" FMT "\n", mxe2, err2);
}
输出
...
96 x: 0.960 y: 0.76499283y1: 0.76582280y2: 0.76438526
97 x: 0.970 y: 0.77017091y1: 0.77082844y2: 0.76967407
98 x: 0.980 y: 0.77529750y1: 0.77575981y2: 0.77493733
99 x: 0.990 y: 0.78037308y1: 0.78061652y2: 0.78017777
100 x: 1.000 y: 0.78539816y1: 0.78539816y2: 0.78539816
max error1: 0.00150847sum sq1: 0.00023062
max error2: 0.00084283sum sq2: 0.00004826
不清楚为什么 OP 的代码使用
fabs()
给出“输入:x = (0, 1]”。
我根据 Pade-Chebyshev 近似开发了以下函数来满足我的需求。该代码是在 Visual Studio (x86) 下编写的,适用于整个源数据范围。该函数的准确度仅略低于标准 atanf 函数,但速度明显快于标准 atanf 函数。
// Fast arctangent with single precission
_declspec(naked) float _vectorcall arctg(float x)
{
// When |x|<=1 following formula is used:
//
// a0 + a0*a1*x^2 + a0*a2*x^4 + a0*a3*x^6
// arctg x = x * ------------------------------------------------------- = x*P(x)/Q(x)
// a0 + a0*(a1+b0)*x^2 + a0*(a2+b1)*x^4 + a0*(a3+b2)*x^6
//
// The a0 constant is reduced, but it is needed to less the error
// and prevent overflow at large |x|.
// P(x) and Q(x) are 3th degree polinomials of x^2.
// When 1<|x|<62919776 (approx.) used formula is
//
// pi/2*|x|*x^6*Q(1/x)-x^6*P(1/x)
// arctg x = pi/2*sgn(x)-arctg(1/x) = --------------------------------
// x*x^6*Q(1/x)
//
// Here x^6*P(1/x) and x^6*Q(1/x) are 3th degree polinomials of x^2 too.
// When |x|>=62919776 used formula is arctg x = pi/2*sgn(x)
// To improve accuracy at |x|>1 the constant pi/2 is replaced by the sum (pi-3)/2 and 3/2.
//
static const float ct[14] = // Constants table
{
6.28740248E-17f, // a0*(a1+b0)=a0*c1
4.86816205E-17f, // a0*a1
2.24874633E-18f, // a0*(a3+b2)=a0*c3
4.02179944E-19f, // a0*a3
4.25772129E-17f, // a0
4.25772129E-17f, // a0
2.50182216E-17f, // a0*(a2+b1)=a0*c2
1.25756219E-17f, // a0*a2
0.0707963258f, // (pi-3)/2
1.5f, // 3/2
1.0f, // 1
-0.0707963258f, // -(pi-3)/2
-1.5f, // -3/2
3.95889818E15f // Threshold of x^2 when arctg(x)=pi/2*sgn(x)
};
_asm
{
vshufps xmm1,xmm0,xmm0,0 // xmm1 = x # x : x # x
mov edx,offset ct // edx contains the address of constants table
vmulps xmm2,xmm1,xmm1 // xmm2 = x^2 # x^2 : x^2 # x^2
vmovups xmm3,[edx+16] // xmm3 = a0*a2 # a0*c2 : a0 # a0
vmulps xmm4,xmm2,xmm2 // xmm4 = y^2 # y^2 : y^2 # y^2
vucomiss xmm2,[edx+40] // Compare y=x^2 to 1
ja arctg_big // Jump if |x|>1
vfmadd231ps xmm3,xmm2,[edx] // xmm3 ~ a3*y+a2 # c3*y+c2 : a1*y+1 # c1*y+1
vmovhlps xmm1,xmm1,xmm3 // xmm1 ~ a3*y+a2 # c3*y+c2
vfmadd231ps xmm3,xmm4,xmm1 // xmm3 ~ a3*y^3+a2*y^2+a1*y+1 # c3*y^3+c2*y^2+c1*y+1
vmovshdup xmm2,xmm3 // xmm2 = P; xmm3 = Q
vdivss xmm2,xmm2,xmm3 // xmm2 = P/Q
vmulss xmm0,xmm0,xmm2 // xmm0 = x*P/Q = arctg(x)
ret // Return
arctg_big: // When |x|>1 use formula pi/2*sgn(x)-arctg(1/x)
vfmadd213ps xmm3,xmm2,[edx] // xmm3 ~ a2*y+a3 # c2*y+c3 : y+a1 # y+c1
vmovmskpd eax,xmm1 // eax=3, if x<0, otherwise eax=0
vmovhlps xmm0,xmm0,xmm3 // xmm0 ~ a2*y+a3 # c2*y+c3
vfmadd213ps xmm3,xmm4,xmm0 // xmm3 ~ y^3+a1*y^2+a2*y+a3 # y^3+c1*y^2+c2*y+c3
vmovss xmm0,[edx+4*eax+32] // xmm0 = (pi-3)/2*sgn(x)
vucomiss xmm2,[edx+52] // Compare y=x^2 to threshold value
jnb arctg_end // The data is already in xmm0, if |x|>=62919776
vmovshdup xmm4,xmm3 // xmm4 = P; xmm3 = Q
vmulss xmm1,xmm1,xmm3 // xmm1 = x*Q
vfmsub132ss xmm0,xmm4,xmm1 // xmm0 = (pi-3)/2*|x|*Q-P
vdivss xmm0,xmm0,xmm1 // xmm0 = (pi-3)/2*sgn(x)-P/(x*Q)
arctg_end: // Add to result 3/2*sgn(x)
vaddss xmm0,xmm0,[edx+4*eax+36] // xmm0 = pi/2*sgn(x)-P/(x*Q)
ret // Return
}
}
反正切似乎可以很好地近似为
z=x/(x+pi/3)
空间中的多项式。以下系数在您的区间上产生的最大误差为 4.07e-9
。
z0 = 0.0;
z1 = 1.0471985201240248;
z2 = 1.047102380719068;
z3 = 0.6678082665364844;
z4 = -0.16312555173050677;
z5 = -0.33989938855441837;
z6 = -5.8773286456840355;
z7 = 17.33961311116544;
z8 = -49.09950369422959;
z9 = 82.10190353000648;
z10 = -60.11210171537528;
z11 = 14.183376881192657;
您还可以使用高阶多项式(第 19 次)深入了解
1.23e-14
。
这里有一些 C# 代码(参考 Accord.net),用于查看精度和系数数量之间的权衡。
using Accord.Math;
using Accord.Statistics.Models.Regression.Linear;
using System.Management;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
foreach(var degree in new[] { 3, 5, 7, 11, 15, 19, 23, 27 })
{
var N = 5000;
var ols = new OrdinaryLeastSquares() { UseIntercept = false };
var inp = new double[N][];
var outputs = new double[N];
for (int i = 0; i < N; ++i)
{
double x = (double)i / (double)N;
inp[i] = new double[degree];
inp[i][0] = x / (Math.PI /3 + x);
for (int j = 1; j < degree; ++j)
inp[i][j] = inp[i][0] * inp[i][j - 1];
outputs[i] = Math.Atan(x);
}
MultipleLinearRegression regression = ols.Learn(inp, outputs);
var r = new List<double>();
foreach (var w in regression.Weights)
Console.WriteLine(w);
var model = inp.Dot(regression.Weights);
var diff = 0.0;
for (int i = 0; i < N; ++i)
diff = Math.Max(Math.Abs(model[i] - outputs[i]), diff);
Console.WriteLine("++++++++++++++++++++++");
Console.WriteLine(diff);
Console.WriteLine("++++++++++++++++++++++");
}
Console.ReadLine();
我发现这个用于 x [0,+inf]
atan(x) = 8x/(3+sqrt(25+80/3*x^2))
对于 x>2 我添加 +0.02 来修复