我现在正在编写导入 Keras 的对象检测 python 代码 这是一组形状为(175,480,720,3)的图像,图像数量为175。 还有一组形状为 (175, 4) 的标签与图像对应,图像中的每个对象都对应一个标签。边界框 (175, 4, 4) 是相同的。
model.fit(trainImages, {"classification": trainLabels, "regression": trainBboxes},
validation_data=(validImages, {"classification": validLabels, "regression": validBboxes}),
batch_size=8, epochs=10)
我将它们输入到我的神经网络中,但它显示错误
ValueError:数据基数不明确。确保所有数组包含相同数量的样本。“x”大小:480, 480, ... 480, 480(175) “y”尺寸:4, 4, ...4, 4(175)
我将 np 数组转换为 list ,但什么也没发生。 完整代码如下:
import os
import numpy as np
import xml.etree.ElementTree as ET
import cv2
import tensorflow as tf
from keras import layers
import keras
print("模块导入完成")
def get_annotations(cname2cid, datadir):
filenames = os.listdir(os.path.join(datadir, 'annotations', 'xml'))
records = []
ct = 0
for fname in filenames:
fid = fname.split('.')[0]
fpath = os.path.join(datadir, 'annotations', 'xml', fname)
img_file = os.path.join(datadir, 'images', fid + '.png')
tree = ET.parse(fpath)
if tree.find('id') is None:
im_id = np.array([ct])
else:
im_id = np.array([int(tree.find('id').text)])
objs = tree.findall('object')
im_w = float(tree.find('size').find('width').text)
im_h = float(tree.find('size').find('height').text)
gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
gt_class = np.zeros((len(objs), ), dtype=np.int32)
is_crowd = np.zeros((len(objs), ), dtype=np.int32)
difficult = np.zeros((len(objs), ), dtype=np.int32)
for i, obj in enumerate(objs):
cname = obj.find('name').text
gt_class[i] = cname2cid[cname]
_difficult = int(obj.find('difficult').text)
x1 = float(obj.find('bndbox').find('xmin').text)
y1 = float(obj.find('bndbox').find('ymin').text)
x2 = float(obj.find('bndbox').find('xmax').text)
y2 = float(obj.find('bndbox').find('ymax').text)
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(im_w - 1, x2)
y2 = min(im_h - 1, y2)
# 这里使用xywh格式来表示目标物体真实框
gt_bbox[i] = [(x1+x2)/2.0, (y1+y2)/2.0, x2-x1+1., y2-y1+1.]
is_crowd[i] = 0
difficult[i] = _difficult
voc_rec = {
'im_file': img_file,
'im_id': im_id,
'h': im_h,
'w': im_w,
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_poly': [],
'difficult': difficult
}
if len(objs) != 0:
records.append(voc_rec)
ct += 1
return records
def create_ssd(input_shape):
# 输入层
inputs = keras.Input(shape=input_shape)
# VGG16 主干网络
base_model = keras.applications.VGG16(
weights="imagenet", include_top=False, input_tensor=inputs)
# 添加额外的卷积层和池化层
x = base_model.output
x = layers.Conv2D(256, 3, padding="same", activation="relu")(x)
x = layers.Conv2D(256, 3, padding="same", activation="relu")(x)
x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
# 添加检测头
num_classes = 2
num_boxes = 4
classification_outputs = layers.Conv2D(num_classes, 3, padding="same")(x)
classification_outputs = layers.Reshape(
(-1, num_classes))(classification_outputs)
classification_outputs = layers.Activation(
"softmax", name="classification")(classification_outputs)
regression_outputs = layers.Conv2D(num_boxes * 4, 3, padding="same")(x)
regression_outputs = layers.Reshape((-1, num_boxes, 4))(regression_outputs)
regression_outputs = layers.Activation(
"linear", name="regression")(regression_outputs)
# 创建模型
model = keras.Model(inputs=inputs, outputs=[
classification_outputs, regression_outputs], name="SSD")
return model
def drawRec(num):
pic = trainImages[num]
colorDict = {0:(0,0,255),1:(255,0,0)}
colors = [colorDict[i] for i in list(trainRecords[num]["gt_class"])]
x = 0
for gt_bbox in list(trainRecords[num]["gt_bbox"]):
color = colors[x]
x+=1
cv2.rectangle(pic,(int(gt_bbox[0]-gt_bbox[2]/2), int(gt_bbox[1]-gt_bbox[3]/2)),
(int(gt_bbox[0]+gt_bbox[2]/2), int(gt_bbox[1]+gt_bbox[3]/2)),color,2)
return pic
if __name__ == "__main__":
cname = {"book": 0, "bottle": 1}
trainRecords = get_annotations(cname, datadir="E:\\ret\\train")
validRecords = get_annotations(cname, datadir="E:\\ret\\valid")
trainImages = [cv2.imread(record["im_file"]) for record in trainRecords]
validImages = [cv2.imread(record["im_file"]) for record in validRecords]
trainLabels = [dlabel["gt_class"] for dlabel in trainRecords]
validLabels = [dlabel["gt_class"] for dlabel in validRecords]
trainBboxes = [dbbox["gt_bbox"] for dbbox in trainRecords]
validBboxes = [dbbox["gt_bbox"] for dbbox in validRecords]
# check
"""
num = np.random.randint(0,174)
pic = drawRec(num)
while True:
cv2.imshow("pic",pic)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
elif (key & 0xFF == ord('.')) and (num<174):
num+=1
pic = drawRec(num)
elif (key & 0xFF == ord(',')) and (num>-175):
num -= 1
pic = drawRec(num)
cv2.destroyAllWindows()
trainImages = np.array(trainImages)
validImages = np.array(validImages)
print(np.shape(trainImages))
print(trainImages[num])
print(trainLabels[num])
print(trainBboxes[num])
"""
for i in range(len(trainLabels)):
while len(trainLabels[i]) <4:
trainLabels[i] = np.append(trainLabels[i],-1)
for i in range(len(trainBboxes)):
while len(trainBboxes[i]) <4:
x = list(trainBboxes[i])
x.append(np.array([0.,0.,0.,0.]))
trainBboxes[i] = np.array(x)
for i in range(len(validLabels)):
while len(validLabels[i]) <4:
validLabels[i] = np.append(validLabels[i],-1)
for i in range(len(validBboxes)):
while len(validBboxes[i]) <4:
x = list(validBboxes[i])
x.append(np.array([0.,0.,0.,0.]))
validBboxes[i] = np.array(x)
print(np.shape(trainLabels))
print(np.shape(trainImages))
# print(trainLabels[num])
print(np.shape(trainBboxes))
# print(trainBboxes[num])
model = create_ssd((480, 720, 3))
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
loss={"classification": "categorical_crossentropy",
"regression": "mse"},
metrics={"classification": "accuracy"})
model.fit(trainImages, {"classification": trainLabels, "regression": trainBboxes},
validation_data=(validImages, {"classification": validLabels, "regression": validBboxes}),
batch_size=8, epochs=10)
model.save_weights(filepath="model.h5", save_format="h5")
您似乎正在尝试使用 Keras 训练用于对象检测的模型。您提供的代码片段演示了如何使用 fit() 函数来训练具有分类和回归任务的模型。 以下是传递给 fit() 函数的参数的详细信息: