我是深度学习的新手,但我想成为一名专业人士。好像没有外部指导,这很难做到:-)
我正在尝试将基于本文https://www.youtube.com/watch?v=azM57JuQpQI&t=23s的这种方法https://www.depends-on-the-definition.com/unet-keras-segmenting-images/用于我使用带有Keras的UNET进行卫星图像分割的任务。
这是我的网络训练代码
import tensorflow as tf
import os
import random
import numpy as np
from tqdm import tqdm
import cv2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from skimage.io import imread, imshow
from skimage.transform import resize
import matplotlib.pyplot as plt
os.environ['KERAS_BACKEND'] = 'tensorflow'
seed = 42
np.random.seed = seed
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
IMAGE_CHANELS = 3
trainImageFolderPath = os.path.join(os.path.dirname(__file__), 'Bright Dunes Groups')
train_ids = next(os.walk(trainImageFolderPath))[1]
X_train = np.zeros((len(train_ids), IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANELS), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.bool)
print('Building training set...')
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
path = os.path.join(trainImageFolderPath, id_)
imagePath = os.path.join(path, id_ + '.jpg')
img = cv2.imread(imagePath)
img = resize(img, (IMAGE_HEIGHT, IMAGE_WIDTH), mode='constant', preserve_range=True)
X_train[n] = img
mask = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.bool)
for mask_file in next(os.walk(os.path.join(path, 'masks')))[2]:
maskPath = os.path.join(path, 'masks', mask_file)
mask_ = cv2.imread(maskPath, cv2.IMREAD_GRAYSCALE)
mask_ = resize(mask_, (IMAGE_HEIGHT, IMAGE_WIDTH), mode='constant', preserve_range=True)
mask_ = np.expand_dims(mask_, axis=-1)
mask = np.maximum(mask, mask_)
Y_train[n] = mask
inputs = tf.keras.layers.Input((IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANELS))
s = tf.keras.layers.Lambda(lambda x: x/255)(inputs)
c1 = tf.keras.layers.Conv2D(16, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
c1 = tf.keras.layers.Dropout(0.1)(c1)
c1 = tf.keras.layers.Conv2D(16, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
p1 = tf.keras.layers.MaxPooling2D((2,2))(c1)
c2 = tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
c2 = tf.keras.layers.Dropout(0.1)(c2)
c2 = tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
p2 = tf.keras.layers.MaxPooling2D((2,2))(c2)
c3 = tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
c3 = tf.keras.layers.Dropout(0.2)(c3)
c3 = tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
p3 = tf.keras.layers.MaxPooling2D((2,2))(c3)
c4 = tf.keras.layers.Conv2D(128, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
c4 = tf.keras.layers.Dropout(0.2)(c4)
c4 = tf.keras.layers.Conv2D(128, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
p4 = tf.keras.layers.MaxPooling2D((2,2))(c4)
c5 = tf.keras.layers.Conv2D(256, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
c5 = tf.keras.layers.Dropout(0.3)(c5)
c5 = tf.keras.layers.Conv2D(256, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)
u6 = tf.keras.layers.Conv2DTranspose(128, (2,2), strides=(2,2), padding='same')(c5)
u6 = tf.keras.layers.concatenate([u6, c4])
c6 = tf.keras.layers.Conv2D(128, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
c6 = tf.keras.layers.Dropout(0.2)(c6)
c6 = tf.keras.layers.Conv2D(128, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)
u7 = tf.keras.layers.Conv2DTranspose(64, (2,2), strides=(2,2), padding='same')(c6)
u7 = tf.keras.layers.concatenate([u7, c3])
c7 = tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
c7 = tf.keras.layers.Dropout(0.2)(c7)
c7 = tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)
u8 = tf.keras.layers.Conv2DTranspose(32, (2,2), strides=(2,2), padding='same')(c7)
u8 = tf.keras.layers.concatenate([u8, c2])
c8 = tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
c8 = tf.keras.layers.Dropout(0.1)(c8)
c8 = tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)
u9 = tf.keras.layers.Conv2DTranspose(16, (2,2), strides=(2,2), padding='same')(c8)
u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
c9 = tf.keras.layers.Conv2D(16, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
c9 = tf.keras.layers.Dropout(0.1)(c9)
c9 = tf.keras.layers.Conv2D(16, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)
outputs = tf.keras.layers.Conv2D(1, (1,1), activation='sigmoid')(c9)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
######################################################################################################
callbacks = [
EarlyStopping(patience=5, verbose=1),
ReduceLROnPlateau(factor=0.2, patience=3, min_lr=0.001, verbose=1),
ModelCheckpoint('bright_Dunes_Groups_model.h5', verbose=1, save_best_only=True)
]
######################################################################################################
results = model.fit(X_train,
Y_train,
validation_split=0.25,
batch_size=16,
epochs=80,
callbacks = callbacks)
###############################################################
plt.figure(figsize=(8, 8))
plt.title("Learning curve")
plt.plot(results.history["loss"], label="loss")
plt.plot(results.history["val_loss"], label="val_loss")
plt.plot( np.argmin(results.history["val_loss"]), np.min(results.history["val_loss"]), marker="x", color="r", label="best model")
plt.xlabel("Epochs")
plt.ylabel("log_loss")
plt.legend()
###############################################################
idx = random.randint(0, len(X_train))
predictions_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
predictions_value = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
predictions_train_t = (predictions_train > 0.5).astype(np.uint8)
predictions_value_t = (predictions_value > 0.5).astype(np.uint8)
###### random training sample
ix = random.randint(0, len(predictions_train_t))
imshow(X_train[ix])
plt.show()
imshow(np.squeeze(Y_train[ix]))
plt.show()
imshow(np.squeeze(predictions_train_t[ix]))
plt.show()
###### random validation sample
ix = random.randint(0, len(predictions_value_t))
imshow(X_train[int(X_train.shape[0]*0.9):][ix])
plt.show()
imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
plt.show()
imshow(np.squeeze(predictions_value_t[ix]))
plt.show()
我的trainig数据集包含141个图像(X_train)和每个图像的一组遮罩。我知道,这是少量数据,但即使从该数据集中,我也希望至少有一些东西。准确性超过75%,但是当我尝试使用下面的代码进行测试时,结果却很糟糕。
import tensorflow as tf
import os
import random
import numpy as np
from tqdm import tqdm
import cv2
from skimage.io import imread, imshow
from skimage.transform import resize
import matplotlib.pyplot as plt
os.environ['KERAS_BACKEND'] = 'tensorflow'
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
IMAGE_CHANELS = 3
modelFilePath = 'bright_Dunes_Groups_model.h5'
model = tf.keras.models.load_model(modelFilePath)
testImageFolderPath = os.path.join(os.path.dirname(__file__), 'TestDunes')
test_ids = next(os.walk(testImageFolderPath))[2]
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
imagePath = os.path.join(testImageFolderPath, id_)
img = cv2.imread(imagePath)
img = cv2.resize(img, (IMAGE_HEIGHT, IMAGE_WIDTH))
imshow(img)
plt.show()
img = np.expand_dims(img, axis=0)
predictions = model.predict(img, verbose=1)
predictions_value_t = (predictions > 0.33).astype(np.uint8)
imshow(np.squeeze(predictions_value_t))
plt.show()
预测非常糟糕。
因此,我怀疑至少有2个地方中的1个有问题:1)在代码中2)在数据集中
我的X_train的样本之一是这个
对应的蒙版看起来像这样
每个X_train可以具有多个蒙版,但是对于此特定图像,只有一个蒙版。
每个X_train图像的尺寸为227 * 227像素。在代码中,我将其大小(和蒙版)调整为256 * 256
对于每个X_train图像,我手动执行数据增强(我将X_train和相应的遮罩旋转90、180、270度并水平和垂直翻转)。正如我提到的,所有加在一起的增强数据都给了我141个X_train图像。
检查数据集是否存在问题的一种方法是生成更多的X_train和掩码。但是,问题在于这是一个手动过程并且非常耗时。因此,在执行此操作之前,我想知道我的解决方案还有什么问题。
我也怀疑是问题的根源之一,口罩的形状可能完全不同。蒙版的形状不均匀会造成这种不良的预测结果吗?
我看到了一些起点(不是解决办法,但是)仔细检查了您的代码/数据集。我在下面总结它们:
skimage.transform.resize
调整训练数据的大小,但正在使用cv2.resize
调整测试数据的大小。我会注意的确保他们两个都一样。某些函数在调整大小时会缩放值的范围。希望有帮助。祝你好运。