为什么我的 3D 到 2D 图像转换结果与 Blender 的不同？

Question

在我的模拟中，3D 点需要转换为鱼眼图像，相当于具有等距相机模型的 Blender 的 PANO 相机。在哪里可以找到Blender实现3D到2D转换的科学论文和技术报告？

我正在寻找：

从世界坐标系到相机坐标系的变换。
从相机坐标系（笛卡尔）到相机坐标系（球面）的转换，以便在将点投影到图像平面上之前将点投影到单位球体。
从球形相机坐标系到传感器平面的变换。
将点从主点移动到传感器左上角的标准化。

虽然 bpy_extras 模块中可以实现透视相机的实现，但不能实现等距相机模型的转换。我的结果与 Blender 不同，为什么？

main.py：

#!/usr/bin/env python3

import numpy as np
np.set_printoptions(precision=4, suppress=True)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import plotting

def compute_R(alpha_deg, beta_deg, gamma_deg):
    ## Rotation Matrix R (code from CV2 exercise)

    # convert into radians...
    alpha = alpha_deg * np.pi / 180
    beta  = beta_deg  * np.pi / 180
    gamma = gamma_deg * np.pi / 180

    # prepare sine and cosine values
    ca = np.cos(alpha)
    sa = np.sin(alpha)
    cb = np.cos(beta)
    sb = np.sin(beta)
    cg = np.cos(gamma)
    sg = np.sin(gamma)
    
    # rotation about x axis
    R_x = np.array([
        [1,  0,   0],
        [0, ca, -sa],
        [0, sa,  ca],
    ], dtype=float)

    # rotation about y axis
    R_y = np.array([
        [ cb, 0, sb],
        [  0, 1,  0],
        [-sb, 0, cb]    
    ], dtype=float)

    # rotation about z axis
    R_z = np.array([
        [cg, -sg, 0],
        [sg,  cg, 0],
        [ 0,   0, 1]    
    ], dtype=float)

    # compose R_xyz
    R = R_z.dot(R_y.dot(R_x))
        
    return R

def cart2spherical_wiki(X_cam):
    ## from https://en.wikipedia.org/wiki/Spherical_coordinate_system
    r = np.sqrt(X_cam[0]**2 + X_cam[1]**2 + X_cam[2]**2)
    print(f'manual_{r=}')
    #r = np.linalg.norm(X_cam, axis=0)
    #print(f'np.linalg_{r=}')
    theta = np.arccos(X_cam[2] / r)
    phi = -np.arctan2(X_cam[1], X_cam[0])

    return theta, phi

def cart2spherical_blender(X_cam, fov):
    # can be ignored for the moment
    r = np.arctan2(np.sqrt(X_cam[1] * X_cam[1] + X_cam[2] * X_cam[2]), X_cam[0]) / fov
    phi = np.arctan2(X_cam[2], X_cam[1])

    return r, phi

def spherical2normimg(f_x, f_y, theta, phi):
    # from phd-thesis michel
    x_img_norm = f_x * theta * np.cos(phi)
    y_img_norm = f_y * theta * np.sin(phi)
    #print(x_img_norm)
    #print(y_img_norm)

    return x_img_norm, y_img_norm

def spherical2normimg_blender(r, phi):
    # can be ignored for the moment
    x_img_norm = r * np.cos(phi) + 0.5
    y_img_norm = r * np.sin(phi) + 0.5

    return x_img_norm, y_img_norm

def points_to_image(x, y, resolution):
    
    # Sensorpunkt (0,0) liegt im Bild an Position (w/2, h/2)
    # deshalb Addition der halben Bildauflösung
    # (x,y) - Sensorpunkte
    # (x_new, y_new) - Bildpunkte
    x_new = x + resolution[0]//2
    y_new = y + resolution[1]//2

    return x_new, y_new

def main():
    # plot input/output?
    plot = 'True'
        
    ## World points
    X_wrld = np.array([
                    [1.5, 5, 1], # right foot
                    [2.5, 5, 1], # left foot
                    [2, 5, 3], # hip
                    [1, 5, 5], # right hand
                    [3, 5, 5]  # left hand
                    ])

    ## Camera
    C = np.array([2, 1, 4]).transpose() # camera projection center in world coordinates

    ## Plot World with 3D Points and Camera
    fig = plt.figure()
    if plot == 'True':
        title = 'World coordinate system: 3D Points and Camera'
        plt1 = plotting.plot_world(X_wrld, C, elev=0, azim=-90, roll=0, title=title, fig=fig)
    
    # transpose X_wrld
    X_wrld = X_wrld.T # take the transpose

    # add ones for homogenious representation
    row_ones = np.ones([1, X_wrld.shape[1]])
    X_wrld_hom = np.concatenate([X_wrld, row_ones], axis=0)
    #print(f'{X_wrld_hom=}')
    ## Use Rotation-Matrix and C for creating H-Matrix (Homography, 4x4)

    # orientation / rotation
    alpha_deg = 0 # first:  rotate about alpha degrees around the camera's x-axis
    beta_deg  = 270 # second: rotate about beta  degrees around the camera's y-axis
    gamma_deg = 0 # third:  rotate about gamma degrees around the camera's z-axis
    
    # Compute 3x3 Rotation Matrix
    R = compute_R(alpha_deg, beta_deg, gamma_deg)
    #print(f'{R=}')

    # Build H-Matrix (4x4)

    # RC ist 3x1
    RC = np.zeros([3,1])
    RC = R.dot(C)
    RC = RC.T
    H = np.column_stack( (R, -RC) )
    H = np.row_stack( (H, [0, 0, 0, 1]) )
    #print(f'{H=}')

    # Transformation from world to camera coordinates
    X_cam = H.dot(X_wrld_hom)
    print(f'{X_cam=}')

    if plot == 'True':
        title = f'camera coordinate system: camera rotation of {alpha_deg=}, {beta_deg=}, {gamma_deg=}'
        #plt2 = plotting.plot_camera(X_cam=X_cam.T, C=[0, 0, 0], elev=180, azim=90, roll=90, title=title, fig=fig)
        plt2 = plotting.plot_camera(X_cam=X_cam.T, C=[0, 0, 0], elev=alpha_deg, azim=beta_deg, roll=gamma_deg, title=title, fig=fig)
    
    ### Intrinsics ###
    # focal length from Canon FD 1:5,6 / 7,5mm (see, e.g. https://de.wikipedia.org/wiki/Fischaugenobjektiv)
    f_mm = 0.0075 #7.5mm
    # field of view
    fov = 180 * np.pi / 180
    #  Vollformat Sensor, e.g. https://de.wikipedia.org/wiki/Sony_Alpha_1
    chip_w = 35.7 * 1e-3 # 36mm
    chip_h = 23.8 * 1e-3 # 24mm
    # Resolution of camera
    res_w = 8640 # px
    res_h = 5760 # px
    # pixel densities
    m_x = res_w / chip_w # px/mm
    m_y = res_h / chip_h # px/mm
    # focal length normalized with pixel densities
    f_x = f_mm * m_x # unitless
    f_y = f_mm * m_y # unitless, f_x != f_y if pixel on the sensor is not square
    ### Intrinsics End ###

    ## From Camera coordinates (carthesian) to spherical camera coordinates (a) Wiki
    theta, phi = cart2spherical_wiki(X_cam)
    ## From spherical camera coordinates to normalized image coordinates (a) Wiki
    x_img_norm, y_img_norm = spherical2normimg(f_x, f_y, theta, phi)
    # camera to spherical from blender implementation
    #r, phi = cart2spherical_blender(X_cam, fov)
    # spherical to normalized image coordinates from blender code
    #x_img_norm, y_img_norm = spherical2normimg_blender(r, phi)

    if plot == 'True':
        title = 'Sensor coordinate system: Camera points on sensor plane'
        plt3 = plotting.plot_sensor(x_img_norm=x_img_norm, y_img_norm=y_img_norm, title=title, fig=fig)

    # From normalized image coordinates to image coordinates by shifting to left top of sensor
    x_img, y_img = points_to_image(x_img_norm, y_img_norm, (res_w, res_h))
    
    if plot == 'True':
        title = 'Image coordinate system: points on image plane shifted to left top corner'
        plotting.plot_image(x_img, y_img, (0, res_w), (0, res_h), title, fig)

    plt.show()
        
if __name__ == "__main__":
    main()

绘图.py：

#!/usr/bin/env python3

import numpy as np
np.set_printoptions(precision=4, suppress=True)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d


def plot_world(X_wrld, C, elev, azim, roll, title, fig):
    plt1 = fig.add_subplot(2, 2, 1, projection='3d')
    colors = ['r', 'b', 'g', 'm', 'y']
    labels = ['right foot', 'left foot', 'hip', 'right hand', 'left hand']    
    for X,Y,Z,c,l in zip(X_wrld[:,0], X_wrld[:,1], X_wrld[:,2], colors, labels):
        plt1.plot3D(X, Y, Z, c=c, marker=".", linestyle='None', label=l)
    
    plt1.plot3D(C[0], C[1], C[2], c="red", marker="v", linestyle='None', label="camera")
    plt1.set(xlabel='X', ylabel='Y', zlabel='Z')
    plt1.set_xlim(0, 4)
    plt1.set_ylim(0, 6)
    plt1.set_zlim(0, 6)
    plt1.legend()
    plt1.set_title(title)
    plt1.view_init(elev=elev, azim=azim, roll=roll, vertical_axis='z')

    return plt1

def plot_camera(X_cam, C, elev, azim, roll, title, fig):
    plt2 = fig.add_subplot(2, 2, 2, projection='3d')
    colors = ['r', 'b', 'g', 'm', 'y']
    labels = ['right foot', 'left foot', 'hip', 'right hand', 'left hand']      
    for X,Y,Z,c,l in zip(X_cam[:,0], X_cam[:,1], X_cam[:,2], colors, labels):
        plt2.plot3D(X, Y, Z, c=c, marker=".", linestyle='None', label=l)
    
    plt2.plot3D(C[0], C[1], C[2], c="red", marker="v", linestyle='None', label="camera")
    plt2.set(xlabel='X', ylabel='Y', zlabel='Z')
    plt2.legend()
    plt2.set_title(title)
    plt2.view_init(elev=elev, azim=azim, roll=roll, vertical_axis='z')

    return plt2

def plot_sensor(x_img_norm, y_img_norm, title, fig):
    plt3 = fig.add_subplot(2, 2, 3)
    colors = ['r', 'b', 'g', 'm', 'y']
    labels = ['right foot', 'left foot', 'hip', 'right hand', 'left hand']
    for x, y, c, l in zip(x_img_norm, y_img_norm, colors, labels):
        plt3.plot(x, y, c=c, marker=".", linestyle='None', label=l)
    
    plt3.set_title(title)
    return plt3

def plot_image(x_img_norm, y_img_norm, limit_x, limit_y, title, fig):
    plt4 = fig.add_subplot(2, 2, 4)
    colors = ['r', 'b', 'g', 'm', 'y']
    labels = ['right foot', 'left foot', 'hip', 'right hand', 'left hand']
    for x, y, c, l in zip(x_img_norm, y_img_norm, colors, labels):
        plt4.plot(x, y, marker='.', label= l, linestyle='None', c=c)

    plt4.set(xlim=limit_x, ylim=limit_y)
    plt4.set_title(title)
    plt4.legend()

    return plt4

混合文件，我尝试模拟相同的五个点并使用等距相机进行渲染。我尝试了不同的旋转组合（R_x、R_y、R_z）。与 Blender 的输出相比，差异在于垂直轴上的反射。

Answer 1

从世界坐标映射到相机坐标：

camera_obj = bpy.context.scene.camera
world_coords = ...
camera_coords = camera_obj.matrix_world.inverted() @ world_coords

受到 Cycles 源代码的启发（

https://github.com/blender/blender/blob/main/intern/cycles/kernel/camera/projection.h

中的

fisheye_to_direction()

和direction_to_fisheye()），我们可以映射从相机空间到最终图像坐标：

camera = camera_obj.data
...
if camera.type == 'PANO':
    v = camera_coords
    a = v.x / v.z
    b = v.y / v.z

    r = math.atan(math.sqrt(a**2 + b**2)) / camera.cycles.fisheye_fov / 0.5
    phi = math.atan2(v.y, v.x)
    x = r * math.cos(phi)
    y = r * math.sin(phi)

    # x and y are OpenGL style normalized device coordinates (-1,-1 is bottom left, 1,1 is top right)
    pixel_x = (0.5*v.x + 0.5) * scene.render.resolution_x
    pixel_y = (1 - (0.5*v.y + 0.5)) * scene.render.resolution_y
    ...

为什么我的 3D 到 2D 图像转换结果与 Blender 的不同？

问题描述投票：0回答：1

1个回答

最新问题

为什么我的 3D 到 2D 图像转换结果与 Blender 的不同？

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1