为什么fprem1指令没有产生预期的结果?

问题描述 投票:0回答:1

《汇编语言的艺术》一书中说:

fprem1 指令计算部分余数。 fprem1 计算 st0/st1 的部分余数。如果 ST0 和 ST1 的指数之差小于 64,fprem1 可以计算出精确的 一次操作中余数。

ST0和ST1是FPU数据寄存器。所以我想测试一下它的性能。 我的代码是这样的:

program  exercise;

#include( "stdlib.hhf" );
static
          x: real80:=156.5e18;//first number
          y: real80:=14.36;  //second number
          z:real80;  // partial remainder
begin exercise;
     fld(y);    //y goes into st0
     fld(x);    //now x goes into st0 but previous content of st0 
                //(that is st1) will move into st1, so x is into st0 and y is into st1
     fprem1();  //calculates the remainder of st0/st1 (here x/y )but 
                // for getting the remainder, the quotient will round to the nearest value and then
                //  remainder = st0 - rounded quotient*st1==>>that is
                // equivalent by remainder= x-rounded quotient*y 
                //  ===>> then the remainder  can be positive or negative.
     fstp(z);   //this will store the partial remainder in the z real80 variable.
     stdout.put("z:  ",z); 
end  exercise;

但是当我运行这个程序时,意识到对于较小的功率差异,结果是可以预期的,但是对于较大的差异,结果远远达不到预期。

 I have summarized my result below:
      dividing               expected remainder     program remainder
     *****************************************************************      
1)    156.5/14.36                  -1.46          -1.4599..9e000
2)    156.5e1/14.36                -.24           -2.39e-1
3)    156.5e2/14.36                -2.4           -2.39
4)    156.5e3/14.36                 4.72           4.7200...619
5)    156.5e4/14.36                 4.12           4.1200...6195
6)    156.5e5/14.36                -1.88          -1.8799...805
7)    156.5e6/14.36                -4.44          -4.4399..3805
8)    156.5e7/14.36                -1.32          -1.3199...5017
9)    156.5e8/14.36                 1.16           1.16006..826
10)   156.5e9/14.36                -2.76          -2.7599..737
11)   156.5e10/14.36                1.12           1.1200619..627
12)   156.5e11/14.36               -3.16          -3.159380..739
13)   156.5e12/14.36               -2.88          -2.873805..346
14)   156.5e13/14.36               -0.08          -7.99..e-2  ~= -0.079
15)   156.5e14/14.36               -0.8           -7.99..e-1  ~= -0.79
16)   156.5e15/14.36                6.36           6.36340...924
17)   156.5e16/14.36                6.16           6.1940300..120
18)   156.5e17/14.36                4.16           4.500300559
        /***** unacceptable results (from program) at bellow *****/
19)   156.5e18/14.36       (expected) 1.48          5.13274..e10                      
20)   156.5e19/14.36       (expected) 0.44          5.13274..e10 
 
     ****************************************************************   
The Results 19) and 20) is unacceptable, because the remainder can not 
  be greater than divisor, which is incorrect.
>>Am I doing wrong? Or is there a problem with this instruction(fprem1)?
Can someone explain my problem? What can I do? Thanks.

  
assembly fpu x87 hla remainder
1个回答
0
投票

FPREM 不准确:

#include <iostream>
#include <random>
#include <cstdint>
#include <cmath>
#include <bit>

extern double x87Fprem( double x, double y );

using namespace std;

int main()
{
    mt19937_64 mt;
    uniform_int_distribution<uint64_t> uid( 0, -1 );
    constexpr size_t N = 10'000'000;
    double accuracySum = 0;
    for( size_t r = N; r; )
    {
        double
            a = bit_cast<double>( uid( mt ) ),
            b = bit_cast<double>( uid( mt ) );
        if( isnan( a ) || isinf( a ) || isnan( b ) )
            continue;
        double
            std = fmod( a, b ),
            x87 = x87Fprem( a, b );
        if( signbit( std ) == signbit( x87 ) || !x87 && !std )
            std = abs( std ),
            x87 = abs( x87 ),
            accuracySum += x87 != std ? log2( x87 > std ? x87 : std ) - log2( abs( x87 - std ) ) : 53;
        --r;
    }
    cout << accuracySum / 1.0e7 << endl;
    return 0;
}

MASM:

PUBLIC ?x87Fprem@@YANNN@Z

_TEXT SEGMENT
?x87Fprem@@YANNN@Z PROC
    movsd   qword ptr [rsp - 16], xmm0
    movsd   qword ptr [rsp - 8], xmm1
    fld     qword ptr [rsp - 8]
    fld     qword ptr [rsp - 16]
    fprem
    fstp    qword ptr [rsp -8]
    fstp    st
    movsd   xmm0, qword ptr [rsp - 8]
    ret
?x87Fprem@@YANNN@Z ENDP
_TEXT ENDS

END

上面的代码在我的 Zen4-CPU 的结果中打印 28 个共享位。

© www.soinside.com 2019 - 2024. All rights reserved.