手动双三次插值算法的问题（C++）

Question

我正在研究一种算法，用 4x4 的输入图像创建 1920x1080 的 BMP 输出图像。

我的代码可能有一些问题，我得到的图像有较暗和较亮的区域，此外，图像没有居中，我想知道我必须在我的代码中更改什么来纠正插值方法，我在我的代码中添加了注释解释算法。

完整代码如下：

#include <iostream>
#include <string>
#include <fstream>

//This string store the 16 pixels (3 pixels per color, 48 bytes values (code in int main(){}))
std::string rgb_inp;

// Image original size
unsigned short wide_in = 4;
unsigned short height_in = 4;

// Output image size
unsigned short wide_end = 1920;
unsigned short height_end = 1080;

// This string stores all the image bytes during the interpolation (1920*1080*3 bytes)
std::string rgb_out((wide_end * 3)* height_end, '\0');

// Header of the BMP file (necessary to make the BMP file to be readable)
unsigned char enc_bmp[54] = {0x42, 0x4D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x80, 0x07, 0x00, 0x00, 0x38, 0x04, 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};

// Define limits for RGB values (0 to 255) and the for the 16 neighboor pixels in the 4x4 original image grid (coordinates)
float def_lims(float val_var, float val_max)
{
    if (val_var < 0)
    {
        val_var = 0;
    }
    else if (val_var > val_max)
    {
        val_var = val_max;
    }

    return val_var;
};

// Interpolation formulas
float bicub_hermite(float f_p0, float f_p1, float f_p2, float f_p3, float xy_frac)
{
    float coef_a = -(0.5f * f_p0) + (1.5f * f_p1) - (1.5f * f_p2) + (0.5f * f_p3);
    float coef_b = (f_p0)-(2.5f * f_p1) + (2.0f * f_p2) - (0.5f * f_p3);
    float coef_c = -(0.5f * f_p0) + (0.5f * f_p2);
    float coef_d = f_p1;

    return (coef_a * (xy_frac * xy_frac * xy_frac)) + (coef_b * (xy_frac * xy_frac)) + (coef_c * (xy_frac)) + (coef_d);
};

// Get the memory direction for the pixel in the original image with the normalized coordinates
unsigned char* pixel_lims(unsigned short x_ent2, unsigned short y_ent2)
{
    // Define limit values for the 16 neighboor pixels in the 4x4 original image grid
    x_ent2 = def_lims(x_ent2, wide_in - 1);
    y_ent2 = def_lims(y_ent2, height_in - 1);

    // Return memory direction (based on the integer value of the normalized coordinate) in it's first RGB value in the input image
    return reinterpret_cast<unsigned char *>(&rgb_inp[(y_ent2 * (wide_in * 3)) + (x_ent2 * 3)]);
};

// Normalize coordinates (proportionally) and store the 3 RGB values in a string
std::string pixel_interp(float& cord_u2, float& cord_v2)
{
    // Normalize coordinates and make x_frac and y_frac the fractional part of the normalized coordinate (xy_frac in bicub_hermite)
    float cord_x = (cord_u2 * wide_in);
    unsigned short x_ent = cord_x;
    // Fractional part of the coordinate
    float x_frac = cord_x - short(cord_x);

    float cord_y = (cord_v2 * height_in);
    unsigned short y_ent = cord_y;
    float y_frac = cord_y - short(cord_y);

    // 16 neighboor pixels (4 rows in total of 4 pixels every row), first row
    unsigned char* p00 = pixel_lims(x_ent - 1, y_ent - 1);
    unsigned char* p10 = pixel_lims(x_ent + 0, y_ent - 1);
    unsigned char* p20 = pixel_lims(x_ent + 1, y_ent - 1);
    unsigned char* p30 = pixel_lims(x_ent + 2, y_ent - 1);

    // Second row
    unsigned char* p01 = pixel_lims(x_ent - 1, y_ent + 0);
    unsigned char* p11 = pixel_lims(x_ent + 0, y_ent + 0); // Origin coordinate
    unsigned char* p21 = pixel_lims(x_ent + 1, y_ent + 0);
    unsigned char* p31 = pixel_lims(x_ent + 2, y_ent + 0);

    unsigned char* p02 = pixel_lims(x_ent - 1, y_ent + 1);
    unsigned char* p12 = pixel_lims(x_ent + 0, y_ent + 1);
    unsigned char* p22 = pixel_lims(x_ent + 1, y_ent + 1);
    unsigned char* p32 = pixel_lims(x_ent + 2, y_ent + 1);

    unsigned char* p03 = pixel_lims(x_ent - 1, y_ent + 2);
    unsigned char* p13 = pixel_lims(x_ent + 0, y_ent + 2);
    unsigned char* p23 = pixel_lims(x_ent + 1, y_ent + 2);
    unsigned char* p33 = pixel_lims(x_ent + 2, y_ent + 2);

    // Store the interpolated pixel (3 interpolation process, one per RGB layer)
    std::string interp_2d_pixel(3, '\0');
    for (unsigned int i = 0; i < 3; ++i)
    {
        // Interpolate vertically
        float fila_0 = bicub_hermite(p00[i], p10[i], p20[i], p30[i], x_frac);
        float fila_1 = bicub_hermite(p01[i], p11[i], p21[i], p31[i], x_frac);
        float fila_2 = bicub_hermite(p02[i], p12[i], p22[i], p32[i], x_frac);
        float fila_3 = bicub_hermite(p03[i], p13[i], p23[i], p33[i], x_frac);

        // Interpolate horizontally
        float pixel_val = bicub_hermite(fila_0, fila_1, fila_2, fila_3, y_frac);

        // Define maximum value for in the RGB scale
        pixel_val = def_lims(pixel_val, 255.0f);

        // Store the value in the 3 layer RGB pixel
        interp_2d_pixel[i] = static_cast<unsigned char>(pixel_val);
    }

    // Return the string of 3 RGB values
    return interp_2d_pixel;
};

// Iterate and store values for the output rgb string (output image of 1920 rows x 1080 columns)
void redim_img()
{
    // Iterator for the Y axis
    char* it_rgb_out_y = &rgb_out[0];

    for (unsigned short filas_y = 0; filas_y < height_end; ++filas_y)
    {
        // Iterator for the X axis
        char* it_rgb_out_x = it_rgb_out_y;

        // cord_v is the regular fraction value in the Y axis (1/1920 * iteration number)
        float cord_v = float(filas_y) / float(height_end - 1);

        for (unsigned short columnas_x = 0; columnas_x < wide_end; ++columnas_x)
        {
            // cord_u is the regular fraction value in the X axis (1/1080 * iteration number)
            float cord_u = float(columnas_x) / float(wide_end - 1);
            std::string pixel_rgb_out(3, '\0');

            // Call to function pixel_interp
            pixel_rgb_out = pixel_interp(cord_u, cord_v);

            // Store the 3 RGB values
            it_rgb_out_x[0] = pixel_rgb_out[0];
            it_rgb_out_x[1] = pixel_rgb_out[1];
            it_rgb_out_x[2] = pixel_rgb_out[2];

            // Move the iterator 3 positions (1 pixel)
            it_rgb_out_x += 3;
        }

        // Move the iterator 1920*3 positions (a row)
        it_rgb_out_y += wide_end * 3;
    }
};

int main()
{
    // Insert the 16 pixels
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0x00);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0xA0);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0x50);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);
    rgb_inp.push_back(0xF0);

    // Store the BMP header to the output image
    std::string bmp_out;

    for (unsigned char ch_ch : enc_bmp)
    {
        bmp_out.push_back(ch_ch);
    }

    // Start of the algorithm
    redim_img();

    // Store all the RGB values (pixels) from rgb_out string in the output image
    for (unsigned char ch_ch : rgb_out)
    {
        bmp_out.push_back(ch_ch);
    }

    // Save the image
    std::ofstream bmp_arch("C:/Users/-/Desktop/x_0.bmp", std::ios::binary);
    bmp_arch.write(bmp_out.data(), bmp_out.size());
    bmp_arch.close();
    std::cout << "Finished";

    return 0;
}

我需要正确的 Hermite 双三次插值算法来调整图像大小。

Answer 1

感谢您发布易于编译和运行的示例。你的代码非常接近。

由于插值不是对称的，即它在每个维度上采样 -1,0,+1,+2 像素，您需要将源图像的采样偏移半个像素以保持图像居中。这是一个非常简单的改变。

    float cord_x = (cord_u2 * wide_in) - 0.5; // subtract 1/2 pixel here
    short x_ent = cord_x; // change this to signed
    // Fractional part of the coordinate
    float x_frac = cord_x - short(cord_x);

    float cord_y = (cord_v2 * height_in) - 0.5; // subtract 1/2 pixel here
    short y_ent = cord_y; // change this to signed
    float y_frac = cord_y - short(cord_y);

并且，

pixel_lims

中的类型更改以适应可能的负数：

unsigned char* pixel_lims(short x_ent2, short y_ent2)

我现在得到这张符合您预期结果的图像

手动双三次插值算法的问题（C++）

问题描述投票：0回答：1

1个回答

最新问题

手动双三次插值算法的问题（C++）

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1