如何反转使用相册变换的边界框

Question

我正在使用以下相册转换来转换给定输入图像以实现我的 YOLOv3：

# Define the transformations that were applied
test_transforms = A.Compose(
    [
        A.LongestMaxSize(max_size=IMAGE_SIZE),
        A.PadIfNeeded(
            min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
        ),
        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255),
    ],
    bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
)

我需要反转图像和边界框的填充以获得原始边界框和图像，但我看不到相册转换的逆操作。

我尝试了以下代码来反转转换过程，但某些值相差 1 或 2：

import os.path

import numpy as np
import albumentations as A
import cv2
import torch
from albumentations.pytorch import ToTensorV2
from PIL import Image, ImageFile

import config


def yolo_to_xml_bbox_xyxy(bbox, w, h):
    # x_center, y_center width heigth
    w_half_len = (bbox[2] * w) / 2
    h_half_len = (bbox[3] * h) / 2
    xmin = int((bbox[0] * w) - w_half_len)
    ymin = int((bbox[1] * h) - h_half_len)
    xmax = int((bbox[0] * w) + w_half_len)
    ymax = int((bbox[1] * h) + h_half_len)
    return (xmin, ymin, xmax, ymax, bbox[4])

def xml_bbox_xyxy_to_yolo_bbox(bbox, w, h):
    # x_center, y_center width heigth
    xmin,ymin,xmax,ymax, c = bbox
    x = (xmin + xmax) / 2.0 / w
    y = (ymin + ymax) / 2.0 / h
    w = (xmax - xmin) / float(w)
    h = (ymax - ymin) / float(h)
    return (x, y, w, h, c)

def yolo_to_xml_bbox_xyxy_np(bbox, w, h):
    # x_center, y_center width heigth
    w_half_len = (bbox[2] * w) / 2
    h_half_len = (bbox[3] * h) / 2
    xmin = int((bbox[0] * w) - w_half_len)
    ymin = int((bbox[1] * h) - h_half_len)
    xmax = int((bbox[0] * w) + w_half_len)
    ymax = int((bbox[1] * h) + h_half_len)
    return [xmin, ymin, xmax, ymax, bbox[4]]

# def yolo_to_xml_bbox_xywh(bbox, w, h):
#     # x_center, y_center width heigth
#     w_half_len = (bbox[2] * w) / 2
#     h_half_len = (bbox[3] * h) / 2
#     xmin = int((bbox[0] * w) - w_half_len)
#     ymin = int((bbox[1] * h) - h_half_len)
#     xmax = int((bbox[0] * w) + w_half_len)
#     ymax = int((bbox[1] * h) + h_half_len)
#     # return [bbox[4],xmin, ymin, xmax, ymax]
#     return [xmin, ymin, xmax-xmin, ymax-ymin]

# Define the original bounding box annotation
original_width = 1280
original_height = 720
IMAGE_SIZE=608
# Check if the inverse transformation is correct for all labels annotations
labels_path = config.DATASET + "/labels"
labels_fns = os.listdir(labels_path)
labels_pathswe = [os.path.join(labels_path, os.path.splitext(label_fn)[0]) for label_fn in labels_fns]

# Define the transformations that were applied
test_transforms = A.Compose(
    [
        A.LongestMaxSize(max_size=IMAGE_SIZE),
        A.PadIfNeeded(
            min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
        ),
        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255),
        # ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
)

for label_pathwe in labels_pathswe:
    print("Label: ", label_pathwe)
    annotations = np.loadtxt(fname=label_pathwe+".txt", delimiter=" ", ndmin=2)
    annotation_base = annotations[:, 0:5]
    contacts = annotations[:, 5:6]
    annotation_unit_vec_xy = annotations[:, 6:8]
    annotation_unit_vec_mag = annotations[:, 8:9]
    bboxes_base = np.roll(annotation_base, 4, axis=1)
    # bboxes_base[:, :4] = np.clip(bboxes_base[:, :4], a_min=0.0000001, a_max=1.0)
    # Ensure that x y w h
    # img_path = "100DOH_DL/images/100DOH_repair_v_Gpk-ptHtIg0_frame000027.jpg"
    img_path = label_pathwe.replace("labels", "images")+".jpg"

    org_image = np.array(Image.open(img_path).convert("RGB"))

    org_xyxy_boxes = []
    for bbox in bboxes_base.tolist():
        org_xyxy_boxes.append(yolo_to_xml_bbox_xyxy(bbox, original_width, original_height))


    augmentations = test_transforms(image=org_image, bboxes=bboxes_base)
    tf_image = augmentations["image"]
    transformed_bbox = augmentations["bboxes"]

    # Inverse transformation
    inverted_transform = A.Compose(
        [
            A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
            A.Resize(height=original_height, width=original_width, interpolation=cv2.INTER_LINEAR, always_apply=True),
        ],
        bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
    )

    # Find non-padded region of image
    # non_padded_rows = [i for i in range(tf_image.shape[0]) if sum(tf_image[i,:,0]) > 0]
    # non_padded_cols = [i for i in range(tf_image.shape[1]) if sum(tf_image[:,i,0]) > 0]
    # top_row, bottom_row = non_padded_rows[0], non_padded_rows[-1]
    # left_col, right_col = non_padded_cols[0], non_padded_cols[-1]
    #
    # # Crop image
    # cropped_image = tf_image[top_row:bottom_row+1, left_col:right_col+1, :]

    non_padded_rows = np.where(tf_image[:, :, 0].sum(axis=1) > 0)[0]
    non_padded_cols = np.where(tf_image[:, :, 0].sum(axis=0) > 0)[0]
    top_row, bottom_row = non_padded_rows[0], non_padded_rows[-1]
    left_col, right_col = non_padded_cols[0], non_padded_cols[-1]

    # Crop image
    cropped_image = tf_image[top_row:bottom_row, left_col:right_col, :]

    new_bboxes = []
    for bbox in transformed_bbox:
        x, y, w, h, c = bbox
        # abs_x = x * tf_image.shape[1]
        # abs_y = y * tf_image.shape[0]
        # abs_w = w * tf_image.shape[1]
        # abs_h = h * tf_image.shape[0]

        xyxy_bbox = yolo_to_xml_bbox_xyxy(bbox, tf_image.shape[1], tf_image.shape[0])
        xmin, ymin, xmax, ymax, c = xyxy_bbox

        # new_x = (abs_x - left_col) / (right_col - left_col)
        # new_y = (abs_y - top_row) / (bottom_row - top_row)
        # new_w = abs_w / (right_col - left_col)
        # new_h = abs_h / (bottom_row - top_row)
        new_x1 = xmin - left_col
        new_y1 = ymin - top_row
        # new_x2 = xmax - left_col
        # new_y2 = ymax - top_row
        new_x2 = xmax - left_col
        new_y2 = ymax - top_row
        yolo_inv_bbox = xml_bbox_xyxy_to_yolo_bbox([new_x1, new_y1, new_x2, new_y2,c],cropped_image.shape[1], cropped_image.shape[0])
        new_bboxes.append(yolo_inv_bbox)

    augmentations_inv = inverted_transform(image=tf_image, bboxes=new_bboxes)
    invtf_image = augmentations_inv["image"]
    invtransformed_bboxes = augmentations_inv["bboxes"]

    inv_org_xyxy_boxes = []
    for bbox in invtransformed_bboxes:
    # for bbox in new_bboxes:
        inv_org_xyxy_boxes.append(yolo_to_xml_bbox_xyxy(bbox, original_width, original_height))

    if len(org_xyxy_boxes) != len(inv_org_xyxy_boxes):
        print("Original and inverted annotations not matching!")
        continue

    theyAreSame = True
    for bbidx, org_bbox in enumerate(org_xyxy_boxes):
        if set(org_xyxy_boxes) != set(inv_org_xyxy_boxes[bbidx]):
            theyAreSame = False
            break
    if theyAreSame == False:
        print("Original and inverted annotations not matching!")
    else:
        print("Ok!")

如何反转使用相册变换的边界框

问题描述投票：0回答：0

最新问题

如何反转使用相册变换的边界框

问题描述 投票：0回答：0

最新问题

问题描述投票：0回答：0