80位长双倍的浮动距离

Question

对于 float, double和 __float128，我们有以下代码来计算它们之间的可表示数。

int32_t float_distance(float x, float y) {
    static_assert(sizeof(float) == sizeof(int32_t), "float is incorrect size.");
    int32_t xi = *reinterpret_cast<int32_t*>(&x);
    int32_t yi = *reinterpret_cast<int32_t*>(&y);
    return yi - xi;
}

int64_t float_distance(double x, double y) {
    static_assert(sizeof(double) == sizeof(int64_t), "double is incorrect size.");
    int64_t xi = *reinterpret_cast<int64_t*>(&x);
    int64_t yi = *reinterpret_cast<int64_t*>(&y);
    return yi - xi;
}


__int128_t float_distance(__float128 x, __float128 y) {
    static_assert(sizeof(__float128) == sizeof(__int128_t), "quad is incorrect size.");
    __int128_t xi = *reinterpret_cast<__int128_t*>(&x);
    __int128_t yi = *reinterpret_cast<__int128_t*>(&y);
    return yi - xi;
}

(这段代码适用于x,y > 0, 为了便于阅读，我们不处理一般情况)

这里没有 int80_t所以，什么是类似的代码 long double? 出于绝望，我尝试了两个 __int128_t 和 __int64_t 返回类型，但没有喜悦。

编辑：看来这并不是IEEE-754浮点数的知名属性。下面是对于那些想知道为什么会这样做的人来说，这是一个很好的阅读。

Answer 1

你可以使用下面提供的浮点表示法的属性来计算。numeric_limits 的模板 <numeric> 头和例程中的 <cmath>. 不需要检查代表值的位。下面是C++的示例代码。

#include <cmath>
#include <cstdint>

#include <numeric>


/*  Return the signed distance from 0 to x, measuring distance as one unit per
    number representable in FPType.  x must be a finite number.
*/
template<typename FPType> intmax_t ToOrdinal(FPType x)
{
    static const int
        Radix             = std::numeric_limits<FPType>::radix,
        SignificandDigits = std::numeric_limits<FPType>::digits,
        MinimumExponent   = std::numeric_limits<FPType>::min_exponent;

    //  Number of normal representable numbers for each exponent.
    static const intmax_t
        NumbersPerExponent = scalbn(Radix-1, SignificandDigits-1);

    static_assert(std::numeric_limits<FPType>::has_denorm == std::denorm_present,
        "This code presumes floating-point type has subnormal numbers.");

    if (x == 0)
        return 0;

    //  Separate the sign.
    int sign = std::signbit(x) ? -1 : +1;
    x = std::fabs(x);

    //  Separate the significand and exponent.
    int exponent = std::ilogb(x)+1;
    FPType fraction = std::scalbn(x, -exponent);

    if (exponent < MinimumExponent)
    {
        //  For subnormal x, adjust to its subnormal representation.
        fraction = std::scalbn(fraction, exponent - MinimumExponent);
        exponent = MinimumExponent;
    }

    /*  Start with the number of representable numbers in preceding normal
        exponent ranges.
    */
    intmax_t count = (exponent - MinimumExponent) * NumbersPerExponent;

    /*  For subnormal numbers, fraction * radix ** SignificandDigits is the
        number of representable numbers from 0 to x.  For normal numbers,
        (fraction-1) * radix ** SignificandDigits is the number of
        representable numbers from the start of x's exponent range to x, and
        1 * radix ** SignificandDigits is the number of representable subnormal
        numbers (which we have not added into count yet).  So, in either case,
        adding fraction * radix ** SignificandDigits is the desired amount to
        add to count.
    */
    count += (intmax_t) std::scalbn(fraction, SignificandDigits);

    return sign * count;
}


/*  Return the number of representable numbers from x to y, including one
    endpoint.
*/
template<typename FPType> intmax_t Distance(FPType y, FPType x)
{
    return ToOrdinal(y) - ToOrdinal(x);
}


#include <iomanip>
#include <iostream>


template<typename FPType> static void Try(FPType x)
{
    std::cout << x << " -> " << ToOrdinal(x) << ".\n";
}


int main(void)
{
    Try(0.f);
    Try(0x1p-149f);
    Try(0x1p-127f);
    Try(0x1p-126f);
    Try(0x1p-126f + 0x1p-149f);
    Try(1.f);
    Try(1.5f);
    Try(0.l);
    Try(1.l);
    Try(1.5l);

    //  Test from 3 steps below 4 to 5 steps above 4.
    float x = 4, y = x;
    for (int i = 0; i < 3; ++i)
        x = std::nexttowardf(x, 0);
    for (int i = 0; i < 5; ++i)
        y = std::nexttowardf(y, INFINITY);
    std::cout << "There are " << Distance(y, x) << std::setprecision(8)
        << " representable numbers from " << x << " to " << y << ".\n";

}

80位长双倍的浮动距离

问题描述投票：1回答：1

1个回答

最新问题

80位长双倍的浮动距离

问题描述 投票：1回答：1

1个回答

最新问题

问题描述投票：1回答：1