所以我正在 Tensorflow 中训练一个 Unet 风格网络的变体来解决我要解决的问题。我注意到一个有趣的模式/错误,我无法理解或修复。
因为我一直在训练这个网络,在张量板上训练损失大于验证损失,但验证指标非常低。(下)
但我一直在查看网络的输出数据,老实说,输出并没有出现“半坏”,至少不是 .25-.30 的 Dice
因此,当我通过重新加载模型并在验证集上进行预测来从外部验证 Dice 时,我得到了 > .90 的高骰子分数。
我觉得这是由于我的损失和使用的指标造成的,但我不确定如何进行。我的损失指标和外部验证指标代码块发布在下面。
损失等级
class sce_dsc(losses.Loss):
def __init__(self, scale_sce=1.0, scale_dsc=1.0, sample_weight = None, epsilon=0.01, name=None):
super(sce_dsc, self).__init__()
self.sce = losses.SparseCategoricalCrossentropy(from_logits=False) #while the last layer activation is sigmoid, logits needs to be false
self.epsilon = epsilon
self.scale_a = scale_sce
self.scale_b = scale_dsc
self.cls = 1
self.weights = sample_weight
def dsc(self, y_true, y_pred, sample_weight = None):
true = tf.cast(y_true[..., 0] == self.cls, tf.int64)
pred = tf.nn.softmax(y_pred, axis=-1)[..., self.cls]
if self.weights is not None:
#true = true * (sample_weight[...])
true = true & (sample_weight[...] !=0)
#pred = pred * (sample_weight[...])
pred = pred & (sample_weight[...] !=0)
A = tf.math.reduce_sum(tf.cast(true, tf.float32) * tf.cast(pred,tf.float32)) * 2
B = tf.cast(tf.math.reduce_sum(true), tf.float32) + tf.cast(tf.math.reduce_sum(pred),tf.float32) + self.epsilon
return (1.0 - A/B)
def call(self, y_true, y_pred):
sce_loss = self.sce(y_true=y_true, y_pred=y_pred, sample_weight=self.weights) * self.scale_a
dsc_loss = self.dsc(y_true=y_true, y_pred=y_pred, sample_weight=self.weights) * self.scale_b
loss = tf.cast(sce_loss, tf.float32) + tf.cast(dsc_loss,tf.float32)
#self.add_loss(loss)
return loss```
Metric Class
class custom_dice(keras.metrics.Metric):
def __init__(self, name = "dsc", **kwargs):
super(custom_dice,self).__init__(**kwargs)
self.dice = self.add_weight(name = 'dice_coef', initializer = 'zeros')
def update_state(self, y_true,y_pred, sample_weight = None):
true = tf.cast(y_true[...,0] == 1, tf.int64)
pred = tf.math.argmax(y_pred == 1 , axis=-1)
if sample_weight is not None:
true = true * (sample_weight[...])
pred = pred * (sample_weight[...])
A = tf.math.count_nonzero(true & pred) * 2
B = tf.math.count_nonzero(true) + tf.math.count_nonzero(pred)
value = tf.math.divide_no_nan(tf.cast(A, tf.float32),tf.cast(B, tf.float32))
self.dice.assign(value)
def result(self):
return self.dice
def reset_state(self):
self.dice.assign(0.0)
External Validation Dice
def dsc(y_true, y_pred, sample_weight=None, c = 1):
print(y_true.shape, y_pred.shape)
true = tf.cast(y_true[...,0] == 1, tf.int64)
pred = tf.math.argmax(y_pred== c , axis=-1)
print(true.shape,pred.shape)
if sample_weight is not None:
true = true * (sample_weight[...])
pred = pred * (sample_weight[...])
A = tf.math.count_nonzero(true & pred) * 2
B = tf.math.count_nonzero(true) + tf.math.count_nonzero(pred)
return A / B
上面的指标遇到了计算 NaN 的问题,或者如果网络没有在没有正类的切片上预测任何东西,则基本上为 0。下面重写的代码解决了这个问题:
def dice(self, y_true,y_pred, epsilon = p['epsilon']):
y_pred_arg = tf.math.argmax(y_pred, axis = -1)
y_true_f = tf.cast(K.flatten(y_true), tf.int64)
y_pred_f = tf.cast(K.flatten(y_pred_arg), tf.int64)
intersection = tf.cast(K.sum(y_true_f * y_pred_f), tf.float32)
dice = (2 * intersection + epsilon) / (tf.cast(K.sum(y_true_f), tf.float32) + tf.cast(K.sum(y_pred_f), tf.float32) + epsilon)
return tf.cast(dice, tf.float32)
epsilon 是一个平滑因子。这有助于防止除以 0 的情况。我个人发现 epsilon = 1e-2 在我当前的网络上有最好的结果,但这绝对是一个应该为训练优化的超参数。