(我知道这些广泛的“它不起作用”问题不是首选,但这是我解决问题的最后选择。)
我遵循了这个精彩的tutorial 使用 U-net 构建和训练语义分割模型。
它的任务是分割一个 出图像。这是一个 2.1 x 2.1 厘米的彩色小方块,将用于其他物体的颜色校准和长度计算。
我使用 775 张图像和真实掩码进行训练,其中 10% 用于验证。仅仅几个时期之后,直到总共 50 个时期,模型的交叉联合 (IoU) 值在训练和验证中都没有达到任何高于 0.49 的值。进行预测也会返回一个完整的黑色遮罩。
我知道我的数据集对于分割来说相对较小,但我从 200 张图像开始并且具有相同的性能。数据扩充也没有改变任何东西。
图像和蒙版的加载和预处理在笔记本中显示时应完美排列。附言。我的数据集包含分辨率为 3000x4000 但在训练时调整为 256 x 256 的图像。
我已经尝试增加我的数据集大小(200 到 775 imgs),更改数据增强参数、学习率、优化器函数、批量大小、时期、每个时期的步数..
IoU 百分比没有任何变化。
代码:
image_directory = "../data/patch_data/patch_images"
mask_directory = "../data/patch_data/patch_masks"
IMG_SIZE = 256
image_list = []
mask_list = []
for file in sorted(os.listdir(image_directory)):
img = cv2.imread(os.path.join(image_directory, file), 0)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation = cv2.INTER_NEAREST)
image_list.append(img)
print(f"All {len(image_list)} images loaded!")
for file in sorted(os.listdir(mask_directory)):
img = cv2.imread(os.path.join(mask_directory, file), 0)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation = cv2.INTER_NEAREST)
mask_list.append(img)
print(f"All {len(mask_list)} masks loaded!")
image_dataset = np.array(image_list, dtype='int32')
image_dataset = np.expand_dims(image_dataset, axis=3)
mask_dataset = np.array(mask_list, dtype='int32')
mask_dataset = np.expand_dims(mask_dataset, axis=3)
image_dataset = image_dataset /255.
mask_dataset = mask_dataset /255.
from sklearn.model_selection import train_test_split
img_train, img_test, mask_train, mask_test = train_test_split(image_dataset, mask_dataset, test_size = 0.10, random_state = 42)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
data_gen_args = dict(
rotation_range=90,
zoom_range=0.2,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True,
vertical_flip=True
)
image_data_generator = ImageDataGenerator(**data_gen_args)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)
seed = 1
image_datagen.fit(img_train, augment=True, seed=seed)
mask_datagen.fit(mask_train, augment=True, seed=seed)
image_generator = image_datagen.flow(
img_train,
batch_size=16,
seed=seed)
mask_generator = mask_datagen.flow(
mask_train,
batch_size=16,
seed=seed)
train_generator = zip(image_generator, mask_generator)
def conv_block(input, num_filters):
x = Conv2D(num_filters, 3, padding="same")(input)
x = BatchNormalization()(x) #Not in the original network.
x = Activation("relu")(x)
x = Conv2D(num_filters, 3, padding="same")(x)
x = BatchNormalization()(x) #Not in the original network
x = Activation("relu")(x)
return x
#Encoder block: Conv block followed by maxpooling
def encoder_block(input, num_filters):
x = conv_block(input, num_filters)
p = MaxPool2D((2, 2))(x)
return x, p
#Decoder block
#skip features gets input from encoder for concatenation
def decoder_block(input, skip_features, num_filters):
x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input)
x = Concatenate()([x, skip_features])
x = conv_block(x, num_filters)
return x
#Build Unet using the blocks
def build_unet(input_shape, num_output):
inputs = Input(input_shape)
s1, p1 = encoder_block(inputs, 64)
s2, p2 = encoder_block(p1, 128)
s3, p3 = encoder_block(p2, 256)
s4, p4 = encoder_block(p3, 512)
b1 = conv_block(p4, 1024) #Bridge
d1 = decoder_block(b1, s4, 512)
d2 = decoder_block(d1, s3, 256)
d3 = decoder_block(d2, s2, 128)
d4 = decoder_block(d3, s1, 64)
if num_output == 1: #Binary
activation = 'sigmoid'
else:
activation = 'softmax'
outputs = Conv2D(num_output, 1, padding="same", activation=activation)(d4) #Change the activation based on n_classes
print(activation)
model = Model(inputs, outputs, name="U-Net")
return model
IMG_HEIGHT = image_dataset.shape[1]
IMG_WIDTH = image_dataset.shape[2]
IMG_CHANNELS = image_dataset.shape[3]
input_shape = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
model = build_unet(input_shape, num_output=1)
model.compile(optimizer=Adam(learning_rate = 1e-3), loss='binary_crossentropy', metrics=[tf.keras.metrics.MeanIoU(num_classes=2)])
model.summary()
history = model.fit(
#x=img_train,y=mask_train
train_generator,
batch_size = 16,
steps_per_epoch=100,
verbose=1,
epochs=50,
validation_data=(img_test, mask_test),
shuffle=False)