我有两张图像,
E1
和E3
,我正在训练 CNN 模型。
为了训练模型,我使用
E1
作为训练,使用 E3
作为 y_train。
我从这些图像中提取图块,以便在图块上训练模型。
该模型没有激活层,因此输出可以取任何值。
因此,例如
preds
的预测值约为 preds.max() = 2.35
和 preds.min() = -1.77
。
我的问题是我无法在最后使用
preds
重建图像,我认为问题在于 preds
值的缩放-取消缩放。
如果我只是这样做
np.uint8(preds)
它几乎全是零,因为 preds
的值很小。
图像应尽可能接近
E2
图像。
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, \
Input, Add
from tensorflow.keras.models import Model
from PIL import Image
CHANNELS = 1
HEIGHT = 32
WIDTH = 32
INIT_SIZE = ((1429, 1416))
def NormalizeData(data):
return (data - np.min(data)) / (np.max(data) - np.min(data) + 1e-6)
def extract_image_tiles(size, im):
im = im[:, :, :CHANNELS]
w = h = size
idxs = [(i, (i + h), j, (j + w)) for i in range(0, im.shape[0], h) for j in range(0, im.shape[1], w)]
tiles_asarrays = []
count = 0
for k, (i_start, i_end, j_start, j_end) in enumerate(idxs):
tile = im[i_start:i_end, j_start:j_end, ...]
if tile.shape[:2] != (h, w):
tile_ = tile
tile_size = (h, w) if tile.ndim == 2 else (h, w, tile.shape[2])
tile = np.zeros(tile_size, dtype=tile.dtype)
tile[:tile_.shape[0], :tile_.shape[1], ...] = tile_
count += 1
tiles_asarrays.append(tile)
return np.array(idxs), np.array(tiles_asarrays)
def build_model(height, width, channels):
inputs = Input((height, width, channels))
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(16, 3, padding='same')(f1)
f2 = BatchNormalization()(f2)
f2 = Activation('relu')(f2)
f3 = Conv2D(16, 3, padding='same')(f2)
f3 = BatchNormalization()(f3)
f3 = Activation('relu')(f3)
addition = Add()([f2, f3])
f4 = Conv2D(32, 3, padding='same')(addition)
f5 = Conv2D(16, 3, padding='same')(f4)
f5 = BatchNormalization()(f5)
f5 = Activation('relu')(f5)
f6 = Conv2D(16, 3, padding='same')(f5)
f6 = BatchNormalization()(f6)
f6 = Activation('relu')(f6)
output = Conv2D(1, 1, padding='same')(f6)
model = Model(inputs, output)
return model
# Load data
img = cv2.imread('E1.tif', cv2.IMREAD_UNCHANGED)
img = cv2.resize(img, (1408, 1408), interpolation=cv2.INTER_AREA)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.array(img, np.uint8)
#plt.imshow(img)
img3 = cv2.imread('E3.tif', cv2.IMREAD_UNCHANGED)
img3 = cv2.resize(img3, (1408, 1408), interpolation=cv2.INTER_AREA)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)
img3 = np.array(img3, np.uint8)
# extract tiles from images
idxs, tiles = extract_image_tiles(WIDTH, img)
idxs2, tiles3 = extract_image_tiles(WIDTH, img3)
# split to train and test data
split_idx = int(tiles.shape[0] * 0.9)
train = tiles[:split_idx]
val = tiles[split_idx:]
y_train = tiles3[:split_idx]
y_val = tiles3[split_idx:]
# build model
model = build_model(HEIGHT, WIDTH, CHANNELS)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss = tf.keras.losses.Huber(),
metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
# scale data before training
train = train / 255.
val = val / 255.
y_train = y_train / 255.
y_val = y_val / 255.
# train
history = model.fit(train,
y_train,
validation_data=(val, y_val),
epochs=50)
# predict on E2
img2 = cv2.imread('E2.tif', cv2.IMREAD_UNCHANGED)
img2 = cv2.resize(img2, (1408, 1408), interpolation=cv2.INTER_AREA)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
img2 = np.array(img2, np.uint8)
# extract tiles from images
idxs, tiles2 = extract_image_tiles(WIDTH, img2)
#scale data
tiles2 = tiles2 / 255.
preds = model.predict(tiles2)
#preds = NormalizeData(preds)
#preds = np.uint8(preds)
# reconstruct predictions
reconstructed = np.zeros((img.shape[0],
img.shape[1]),
dtype=np.uint8)
# reconstruction process
for tile, (y_start, y_end, x_start, x_end) in zip(preds[:, :, -1], idxs):
y_end = min(y_end, img.shape[0])
x_end = min(x_end, img.shape[1])
reconstructed[y_start:y_end, x_start:x_end] = tile[:(y_end - y_start), :(x_end - x_start)]
im = Image.fromarray(reconstructed)
im = im.resize(INIT_SIZE)
im.show()
您可以在这里找到数据
如果我使用:
def normalize_arr_to_uint8(arr):
the_min = arr.min()
the_max = arr.max()
the_max -= the_min
arr = ((arr - the_min) / the_max) * 255.
return arr.astype(np.uint8)
preds = model.predict(tiles2)
preds = normalize_arr_to_uint8(preds)
然后,我收到一张看起来正确的图像,但到处都是线条。
这是我得到的图像:
这是我应该拍摄的图像(尽可能接近
E2
)。请注意,我仅在本示例中使用小型 cnn 网络,因此我无法接收图像的太多详细信息。但是,当我尝试更好的模型时,我仍然有水平和/或垂直线:
更新
我找到了这个。
在上面的代码中,我使用的是:
# reconstruction process
for tile, (y_start, y_end, x_start, x_end) in zip(preds[:, :, -1], idxs):
preds[:, :, -1]
这是错误的。
我必须使用
preds[:, :, :, -1]
,因为preds
形状是:(1936, 32, 32, 1)
。
所以,如果我使用
preds[:, :, -1]
我会收到我发布的图像。
如果我使用
preds[:, :, :, -1]
,这是正确的,我会收到一个新图像,除了水平线之外,我还得到垂直线!
更新2
我只是添加新代码,使用其他补丁和重建函数,但产生相同的结果(更好一点的图片)。
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, \
Input, Add
from tensorflow.keras.models import Model
from PIL import Image
# gpu setup
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
CHANNELS = 1
HEIGHT = 1408
WIDTH = 1408
PATCH_SIZE = 32
STRIDE = PATCH_SIZE//2
INIT_SIZE = ((1429, 1416))
def normalize_arr_to_uint8(arr):
the_min = arr.min()
the_max = arr.max()
the_max -= the_min
arr = ((arr - the_min) / the_max) * 255.
return arr.astype(np.uint8)
def NormalizeData(data):
return (data - np.min(data)) / (np.max(data) - np.min(data) + 1e-6)
def recon_im(patches: np.ndarray, im_h: int, im_w: int, n_channels: int, stride: int):
"""Reconstruct the image from all patches.
Patches are assumed to be square and overlapping depending on the stride. The image is constructed
by filling in the patches from left to right, top to bottom, averaging the overlapping parts.
Parameters
-----------
patches: 4D ndarray with shape (patch_number,patch_height,patch_width,channels)
Array containing extracted patches. If the patches contain colour information,
channels are indexed along the last dimension: RGB patches would
have `n_channels=3`.
im_h: int
original height of image to be reconstructed
im_w: int
original width of image to be reconstructed
n_channels: int
number of channels the image has. For RGB image, n_channels = 3
stride: int
desired patch stride
Returns
-----------
reconstructedim: ndarray with shape (height, width, channels)
or ndarray with shape (height, width) if output image only has one channel
Reconstructed image from the given patches
"""
patch_size = patches.shape[1] # patches assumed to be square
# Assign output image shape based on patch sizes
rows = ((im_h - patch_size) // stride) * stride + patch_size
cols = ((im_w - patch_size) // stride) * stride + patch_size
if n_channels == 1:
reconim = np.zeros((rows, cols))
divim = np.zeros((rows, cols))
else:
reconim = np.zeros((rows, cols, n_channels))
divim = np.zeros((rows, cols, n_channels))
p_c = (cols - patch_size + stride) / stride # number of patches needed to fill out a row
totpatches = patches.shape[0]
initr, initc = 0, 0
# extract each patch and place in the zero matrix and sum it with existing pixel values
reconim[initr:patch_size, initc:patch_size] = patches[0]# fill out top left corner using first patch
divim[initr:patch_size, initc:patch_size] = np.ones(patches[0].shape)
patch_num = 1
while patch_num <= totpatches - 1:
initc = initc + stride
reconim[initr:initr + patch_size, initc:patch_size + initc] += patches[patch_num]
divim[initr:initr + patch_size, initc:patch_size + initc] += np.ones(patches[patch_num].shape)
if np.remainder(patch_num + 1, p_c) == 0 and patch_num < totpatches - 1:
initr = initr + stride
initc = 0
reconim[initr:initr + patch_size, initc:patch_size] += patches[patch_num + 1]
divim[initr:initr + patch_size, initc:patch_size] += np.ones(patches[patch_num].shape)
patch_num += 1
patch_num += 1
# Average out pixel values
reconstructedim = reconim / divim
return reconstructedim
def get_patches(GT, stride, patch_size):
"""Extracts square patches from an image of any size.
Parameters
-----------
GT : ndarray
n-dimensional array containing the image from which patches are to be extracted
stride : int
desired patch stride
patch_size : int
patch size
Returns
-----------
patches: ndarray
array containing all patches
im_h: int
height of image to be reconstructed
im_w: int
width of image to be reconstructed
n_channels: int
number of channels the image has. For RGB image, n_channels = 3
"""
hr_patches = []
for i in range(0, GT.shape[0] - patch_size + 1, stride):
for j in range(0, GT.shape[1] - patch_size + 1, stride):
hr_patches.append(GT[i:i + patch_size, j:j + patch_size])
im_h, im_w = GT.shape[0], GT.shape[1]
if len(GT.shape) == 2:
n_channels = 1
else:
n_channels = GT.shape[2]
patches = np.asarray(hr_patches)
return patches, im_h, im_w, n_channels
def build_model(height, width, channels):
inputs = Input((height, width, channels))
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(16, 3, padding='same')(f1)
f2 = BatchNormalization()(f2)
f2 = Activation('relu')(f2)
f3 = Conv2D(16, 3, padding='same')(f2)
f3 = BatchNormalization()(f3)
f3 = Activation('relu')(f3)
addition = Add()([f2, f3])
f4 = Conv2D(32, 3, padding='same')(addition)
f5 = Conv2D(16, 3, padding='same')(f4)
f5 = BatchNormalization()(f5)
f5 = Activation('relu')(f5)
f6 = Conv2D(16, 3, padding='same')(f5)
f6 = BatchNormalization()(f6)
f6 = Activation('relu')(f6)
output = Conv2D(1, 1, padding='same')(f6)
model = Model(inputs, output)
return model
# Load data
img = cv2.imread('E1.tif', cv2.IMREAD_UNCHANGED)
img = cv2.resize(img, (HEIGHT, WIDTH), interpolation=cv2.INTER_AREA)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.array(img, np.uint8)
img3 = cv2.imread('E3.tif', cv2.IMREAD_UNCHANGED)
img3 = cv2.resize(img3, (HEIGHT, WIDTH), interpolation=cv2.INTER_AREA)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)
img3 = np.array(img3, np.uint8)
# extract tiles from images
tiles, H, W, C = get_patches(img[:, :, :CHANNELS], stride=STRIDE, patch_size=PATCH_SIZE)
tiles3, H, W, C = get_patches(img3[:, :, :CHANNELS], stride=STRIDE, patch_size=PATCH_SIZE)
# split to train and test data
split_idx = int(tiles.shape[0] * 0.9)
train = tiles[:split_idx]
val = tiles[split_idx:]
y_train = tiles3[:split_idx]
y_val = tiles3[split_idx:]
# build model
model = build_model(PATCH_SIZE, PATCH_SIZE, CHANNELS)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss = tf.keras.losses.Huber(),
metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
# scale data before training
train = train / 255.
val = val / 255.
y_train = y_train / 255.
y_val = y_val / 255.
# train
history = model.fit(train,
y_train,
validation_data=(val, y_val),
batch_size=16,
epochs=20)
# predict on E2
img2 = cv2.imread('E2.tif', cv2.IMREAD_UNCHANGED)
img2 = cv2.resize(img2, (HEIGHT, WIDTH), interpolation=cv2.INTER_AREA)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
img2 = np.array(img2, np.uint8)
# extract tiles from images
tiles2, H, W, CHANNELS = get_patches(img2[:, :, :CHANNELS], stride=STRIDE, patch_size=PATCH_SIZE)
#scale data
tiles2 = tiles2 / 255.
preds = model.predict(tiles2)
preds = normalize_arr_to_uint8(preds)
reconstructed = recon_im(preds[:, :, :, -1], HEIGHT, WIDTH, CHANNELS, stride=STRIDE)
im = Image.fromarray(reconstructed)
im = im.resize(INIT_SIZE)
im.show()
生成的图像:
所以我认为这主要是关于数据的后处理和可视化:
我现在这样想象:
# predict on E2
img2 = cv2.imread('images/E2.tif', cv2.IMREAD_UNCHANGED)
img2 = cv2.resize(img2, (1408, 1408), interpolation=cv2.INTER_AREA)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
img2 = np.array(img2, np.uint8)
# extract tiles from images
idxs, tiles2 = extract_image_tiles(WIDTH, img2)
# scale data
tiles2 = tiles2 / 255.
preds = model.predict(tiles2)
# Check model output range
print("Max prediction value:", np.max(preds))
print("Min prediction value:", np.min(preds))
# Invert colors in predictions
inverted_preds = 1.0 - preds
# Ensure values are within valid range
inverted_preds = np.clip(inverted_preds, 0, 1)
# Reconstruct inverted predictions
reconstructed = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
# Reconstruction process
for tile, (y_start, y_end, x_start, x_end) in zip(inverted_preds[:, :, :, -1], idxs):
y_end = min(y_end, img.shape[0])
x_end = min(x_end, img.shape[1])
reconstructed[y_start:y_end, x_start:x_end] = (tile * 255).astype(np.uint8)
im = Image.fromarray(reconstructed)
im = im.resize(INIT_SIZE)
im.show()
我已经可以清楚地看到黑色方块了。所以接下来我将尝试一些阈值来增强后期处理中的可见性。
我在想类似的事情:
# Threshold value (adjust as needed)
threshold = 0.9 #0.45
# Thresholding
binary_output = (reconstructed >= threshold).astype(np.uint8) * 255
# Second visualization
im_binary = Image.fromarray(binary_output)
im_binary = im_binary.resize(INIT_SIZE)
im_binary.show()
这给我留下了这个:
不确定这在整个数据集上的扩展效果如何,但这对于后处理中的某些形态运算符来说绝对是可行的。