我使用
opencv-python
来读取图像,对于每个图像,我都有一个 3D 边界框列表,其中激光雷达坐标中包含 xyz_location、xyz_scale 和 xyz_rotation(欧拉角),并且提供的变换矩阵是从(激光雷达到相机)的 extrinsic_matrix
坐标)和 intrinsic_matrix
(从相机坐标到像素坐标)。
我需要创建一种方法来在图像顶部覆盖/绘制边界框,然后将图像添加到 open3d.visualization.Visualizer。为此,我创建了以下函数:
def __add_bbox__(self, label_dict: dict):
if 'camera_bbox' not in label_dict: return
camera_bbox_dict = label_dict['camera_bbox']
center = camera_bbox_dict['xyz_center']
w, l, h = camera_bbox_dict['wlh_extent']
rotation_matrix = camera_bbox_dict['xyz_rotation_matrix']
color = camera_bbox_dict['rgb_bbox_color']
# define 3D bounding box
x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
# rotate and translate 3D bounding box
corners_3d = np.dot(rotation_matrix, np.vstack([x_corners, y_corners, z_corners]))
# moving the center to object center
corners_3d[0, :] = corners_3d[0, :] + center[0]
corners_3d[1, :] = corners_3d[1, :] + center[1]
corners_3d[2, :] = corners_3d[2, :] + center[2]
# if any corner is behind camera, return
if np.any(corners_3d[2, :] < 0.1): return
# project 3D bounding box to 2D image
corners_2d = label_dict['calib']['P2'].reshape(3, 4) @ nx3_to_nx4(corners_3d.T).T
corners_2d = corners_2d.T # 3x8 -> 8x3
corners_2d = corners_2d[:, 0:2] / corners_2d[:, 2:3]
corners_2d = corners_2d[:, 0:2].astype(np.int32)
# draw 2D bounding box
img_np = np.asarray(self.img)
for k in range(0, 4):
i, j = k, (k + 1) % 4
cv2.line(img_np, (corners_2d[i, 0], corners_2d[i, 1]), (corners_2d[j, 0], corners_2d[j, 1]), color, self.cfg.visualization.camera.bbox_line_width)
i, j = k + 4, (k + 1) % 4 + 4
cv2.line(img_np, (corners_2d[i, 0], corners_2d[i, 1]), (corners_2d[j, 0], corners_2d[j, 1]), color, self.cfg.visualization.camera.bbox_line_width)
i, j = k, k + 4
cv2.line(img_np, (corners_2d[i, 0], corners_2d[i, 1]), (corners_2d[j, 0], corners_2d[j, 1]), color, self.cfg.visualization.camera.bbox_line_width)
self.img = o3d.geometry.Image(img_np)
self.__add_geometry__('image', self.img, False)
而
__add_geometry__
函数只是删除之前的 open3d.geometry.Image 并添加新的。
我有一个名为
Hanlder
的校准文件读取器功能,如下:
def Handler(label_path: str, calib_path: str):
output = []
# read calib
calib_file_name = os.path.basename(calib_path).split('.')[0]
calib_path = calib_path.replace(calib_file_name, 'front') # front camera calib
calib_exists = os.path.exists(calib_path)
if calib_exists:
with open(calib_path, 'r') as f: calib = json.load(f)
extrinsic_matrix = np.reshape(calib['extrinsic'], [4,4])
intrinsic_matrix = np.reshape(calib['intrinsic'], [3,3])
# read label file
if os.path.exists(label_path) == False: return output
with open(label_path, 'r') as f: lbls = json.load(f)
for item in lbls:
annotator = item['annotator'] if 'annotator' in item else 'Unknown'
obj_id = int(item['obj_id'])
obj_type = item['obj_type']
psr = item['psr']
psr_position_xyz = [float(psr['position']['x']), float(psr['position']['y']), float(psr['position']['z'])]
psr_rotation_xyz = [float(psr['rotation']['x']), float(psr['rotation']['y']), float(psr['rotation']['z'])]
psr_scale_xyz = [float(psr['scale']['x']), float(psr['scale']['y']), float(psr['scale']['z'])]
label = dict()
label['annotator'] = annotator
label['id'] = obj_id
label['type'] = obj_type
label['psr'] = psr
if calib_exists:
label['calib'] = calib
label['calib']['P2'] = nx3_to_nx4(intrinsic_matrix)
lidar_xyz_center = np.array(psr_position_xyz, dtype=np.float32)
lidar_wlh_extent = np.array(psr_scale_xyz, dtype=np.float32)
lidar_rotation_matrix = o3d.geometry.OrientedBoundingBox.get_rotation_matrix_from_xyz(psr_rotation_xyz)
if obj_type in colors: lidar_bbox_color = [i / 255.0 for i in colors[obj_type]]
else: lidar_bbox_color = [0, 0, 0]
label['lidar_bbox'] = {'xyz_center': lidar_xyz_center, 'wlh_extent': lidar_wlh_extent, 'xyz_rotation_matrix': lidar_rotation_matrix, 'rgb_bbox_color': lidar_bbox_color}
if calib_exists:
R_x = np.array([
[1, 0, 0],
[0, math.cos(psr_rotation_xyz[0]), -math.sin(psr_rotation_xyz[0])],
[0, math.sin(psr_rotation_xyz[0]), math.cos(psr_rotation_xyz[0])]
])
#Calculate rotation about y axis
R_y = np.array([
[math.cos(psr_rotation_xyz[1]), 0, math.sin(psr_rotation_xyz[1])],
[0, 1, 0],
[-math.sin(psr_rotation_xyz[1]), 0, math.cos(psr_rotation_xyz[1])]
])
#Calculate rotation about z axis
R_z = np.array([
[math.cos(psr_rotation_xyz[2]), -math.sin(psr_rotation_xyz[2]), 0],
[math.sin(psr_rotation_xyz[2]), math.cos(psr_rotation_xyz[2]), 0],
[0, 0, 1]])
camera_rotation_matrix = np.matmul(R_x, np.matmul(R_y, R_z))
camera_translation_matrix = lidar_xyz_center.reshape([-1,1])
rotation_and_translation_matrix = np.concatenate([camera_rotation_matrix, camera_translation_matrix], axis=-1)
rotation_and_translation_matrix = np.concatenate([rotation_and_translation_matrix, np.array([0,0,0,1]).reshape([1,-1])], axis=0)
origin_point = np.array([0, 0, 0, 1])
camera_xyz_center = np.matmul(extrinsic_matrix, np.matmul(rotation_and_translation_matrix, origin_point))
camera_xyz_center = camera_xyz_center[0:3]
if obj_type in colors: camera_bbox_color = colors[obj_type]
else: camera_bbox_color = [0, 0, 0]
label['camera_bbox'] = {'xyz_center': camera_xyz_center, 'wlh_extent': lidar_wlh_extent, 'xyz_rotation_matrix': lidar_rotation_matrix, 'rgb_bbox_color': camera_bbox_color}
output.append(label)
return output
Hanlder
创建一个 label_dict
列表,对于每个 label_dict
,我调用 __add_bbox__
函数。
此设置绘制了边界框,但它们似乎关闭,示例图像如下所示:
,我确信转换矩阵是正确的(它们相同的标签和校准文件在官方 github 实现中工作https://github.com/naurril/SUSTechPOINTS/blob/dev-auto-annotate/tools/visualize-camera .py.
看第一张图片,似乎事情可能没有正确旋转。也许确认激光雷达上的 xyz 是 x 进入页面、y 到侧面、z 向上/向下...凸轮使用 x 侧面、y 向上/向下、z 进入页面。 我混乱的大脑说: np.vstack([x_corners, y_corners, z_corners] 是你的罪魁祸首, 也许尝试在矩阵中使用 zxy 来排列坐标: np.vstack([z_corners, x_corners, y_corners]