可能由于数据形状而导致的错误

问题描述 投票:0回答:1

我现在正在编写导入 Keras 的对象检测 python 代码 这是一组形状为(175,480,720,3)的图像,图像数量为175。 还有一组形状为 (175, 4) 的标签与图像对应,图像中的每个对象都对应一个标签。边界框 (175, 4, 4) 是相同的。

model.fit(trainImages, {"classification": trainLabels, "regression": trainBboxes},
            validation_data=(validImages, {"classification": validLabels, "regression": validBboxes}),
            batch_size=8, epochs=10)

我将它们输入到我的神经网络中,但它显示错误

ValueError:数据基数不明确。确保所有数组包含相同数量的样本。“x”大小:480, 480, ... 480, 480(175) “y”尺寸:4, 4, ...4, 4(175)

我将 np 数组转换为 list ,但什么也没发生。 完整代码如下:

import os
import numpy as np
import xml.etree.ElementTree as ET
import cv2
import tensorflow as tf
from keras import layers
import keras

print("模块导入完成")

def get_annotations(cname2cid, datadir):
    filenames = os.listdir(os.path.join(datadir, 'annotations', 'xml'))
    records = []
    ct = 0
    for fname in filenames:
        fid = fname.split('.')[0]
        fpath = os.path.join(datadir, 'annotations', 'xml', fname)
        img_file = os.path.join(datadir, 'images', fid + '.png')
        tree = ET.parse(fpath)

        if tree.find('id') is None:
            im_id = np.array([ct])
        else:
            im_id = np.array([int(tree.find('id').text)])

        objs = tree.findall('object')
        im_w = float(tree.find('size').find('width').text)
        im_h = float(tree.find('size').find('height').text)
        gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
        gt_class = np.zeros((len(objs), ), dtype=np.int32)
        is_crowd = np.zeros((len(objs), ), dtype=np.int32)
        difficult = np.zeros((len(objs), ), dtype=np.int32)
        for i, obj in enumerate(objs):
            cname = obj.find('name').text
            gt_class[i] = cname2cid[cname]
            _difficult = int(obj.find('difficult').text)
            x1 = float(obj.find('bndbox').find('xmin').text)
            y1 = float(obj.find('bndbox').find('ymin').text)
            x2 = float(obj.find('bndbox').find('xmax').text)
            y2 = float(obj.find('bndbox').find('ymax').text)
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(im_w - 1, x2)
            y2 = min(im_h - 1, y2)
            # 这里使用xywh格式来表示目标物体真实框
            gt_bbox[i] = [(x1+x2)/2.0, (y1+y2)/2.0, x2-x1+1., y2-y1+1.]
            is_crowd[i] = 0
            difficult[i] = _difficult

        voc_rec = {
            'im_file': img_file,
            'im_id': im_id,
            'h': im_h,
            'w': im_w,
            'is_crowd': is_crowd,
            'gt_class': gt_class,
            'gt_bbox': gt_bbox,
            'gt_poly': [],
            'difficult': difficult
        }
        if len(objs) != 0:
            records.append(voc_rec)
        ct += 1
    return records


def create_ssd(input_shape):
    # 输入层
    inputs = keras.Input(shape=input_shape)

    # VGG16 主干网络
    base_model = keras.applications.VGG16(
        weights="imagenet", include_top=False, input_tensor=inputs)

    # 添加额外的卷积层和池化层
    x = base_model.output
    x = layers.Conv2D(256, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(256, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)

    # 添加检测头
    num_classes = 2  
    num_boxes = 4 

    classification_outputs = layers.Conv2D(num_classes, 3, padding="same")(x)
    classification_outputs = layers.Reshape(
        (-1, num_classes))(classification_outputs)
    classification_outputs = layers.Activation(
        "softmax", name="classification")(classification_outputs)

    regression_outputs = layers.Conv2D(num_boxes * 4, 3, padding="same")(x)
    regression_outputs = layers.Reshape((-1, num_boxes, 4))(regression_outputs)
    regression_outputs = layers.Activation(
        "linear", name="regression")(regression_outputs)

    # 创建模型
    model = keras.Model(inputs=inputs, outputs=[
                        classification_outputs, regression_outputs], name="SSD")
    return model

def drawRec(num):
    pic = trainImages[num]
    colorDict = {0:(0,0,255),1:(255,0,0)}
    colors = [colorDict[i] for i in list(trainRecords[num]["gt_class"])]
    x = 0
    for gt_bbox in list(trainRecords[num]["gt_bbox"]):
        color = colors[x]
        x+=1
        cv2.rectangle(pic,(int(gt_bbox[0]-gt_bbox[2]/2), int(gt_bbox[1]-gt_bbox[3]/2)),
                          (int(gt_bbox[0]+gt_bbox[2]/2), int(gt_bbox[1]+gt_bbox[3]/2)),color,2)
    return pic

if __name__ == "__main__":
    cname = {"book": 0, "bottle": 1}
    trainRecords = get_annotations(cname, datadir="E:\\ret\\train")
    validRecords = get_annotations(cname, datadir="E:\\ret\\valid")

    trainImages = [cv2.imread(record["im_file"]) for record in trainRecords]
    validImages = [cv2.imread(record["im_file"]) for record in validRecords]

    trainLabels = [dlabel["gt_class"] for dlabel in trainRecords]
    validLabels = [dlabel["gt_class"] for dlabel in validRecords]

    trainBboxes = [dbbox["gt_bbox"] for dbbox in trainRecords]
    validBboxes = [dbbox["gt_bbox"] for dbbox in validRecords]

    # check
    """
    num = np.random.randint(0,174)
    pic = drawRec(num)
    while True:
        cv2.imshow("pic",pic)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        elif (key & 0xFF == ord('.')) and (num<174):
            num+=1
            pic = drawRec(num)
        elif (key & 0xFF == ord(',')) and (num>-175):
            num -= 1
        pic = drawRec(num)
    cv2.destroyAllWindows()

    trainImages = np.array(trainImages)
    validImages = np.array(validImages)
    print(np.shape(trainImages))
    print(trainImages[num])
    print(trainLabels[num])
    print(trainBboxes[num])
    
    """
    for i in range(len(trainLabels)):
        while len(trainLabels[i]) <4:
            trainLabels[i] = np.append(trainLabels[i],-1)
    for i in range(len(trainBboxes)):
        while len(trainBboxes[i]) <4:
            x = list(trainBboxes[i])
            x.append(np.array([0.,0.,0.,0.]))
            trainBboxes[i] = np.array(x)
            
    for i in range(len(validLabels)):
        while len(validLabels[i]) <4:
            validLabels[i] = np.append(validLabels[i],-1)
    for i in range(len(validBboxes)):
        while len(validBboxes[i]) <4:
            x = list(validBboxes[i])
            x.append(np.array([0.,0.,0.,0.]))
            validBboxes[i] = np.array(x)
            
            
    print(np.shape(trainLabels))
    print(np.shape(trainImages))
#    print(trainLabels[num])
    print(np.shape(trainBboxes))
#    print(trainBboxes[num])
    
    model = create_ssd((480, 720, 3))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
                  loss={"classification": "categorical_crossentropy",
                        "regression": "mse"},
                  metrics={"classification": "accuracy"})
    
    model.fit(trainImages, {"classification": trainLabels, "regression": trainBboxes},
                validation_data=(validImages, {"classification": validLabels, "regression": validBboxes}),
                batch_size=8, epochs=10)


    model.save_weights(filepath="model.h5", save_format="h5")


python keras object-detection
1个回答
0
投票

您似乎正在尝试使用 Keras 训练用于对象检测的模型。您提供的代码片段演示了如何使用 fit() 函数来训练具有分类和回归任务的模型。 以下是传递给 fit() 函数的参数的详细信息:

  • trainImages:这是一个包含训练数据的 NumPy 数组 形状为 (175, 480, 720, 3) 的图像,每个代表 175 张图像 高480像素,宽720像素,3通道 (RGB)。
  • {“分类”:trainLabels,“回归”:trainBboxes}: 这是一个字典,其中键是输出层名称 你的模型(大概是“分类”和“回归”),以及 值是相应的训练标签。 trainLabels 是一个 形状 (175, 4) 的 NumPy 数组,包含分类标签 对于每个图像,trainBboxes 是一个形状为 (175, 4, 4) 包含每个图像的回归标签。
  • validation_data:该参数指定验证数据 训练期间使用。 validImages 包含验证 图像,以及 {“分类”:validLabels,“回归”: validBboxes} 包含相应的验证标签。
  • batch_size:这个参数决定了每个样本的数量 梯度更新。在本例中,您使用的批量大小为 8。
  • epochs:该参数指定epoch的数量 (对整个训练数据集进行迭代)来训练模型。
© www.soinside.com 2019 - 2024. All rights reserved.