我一直在研究 keras-cv 存储库,我有一些问题希望你能帮助我。我目前正在研究船舶检测模型。所以我正在尝试实现 retinaNet 模型。在将我的数据适合特定的 keras-cv 输入后,我尝试拟合模型。然而,我在预测方面遇到了一些麻烦。
问题:当我使用
model.fit(image_test)
时,返回的预测全部为-1:
{'boxes': array([[[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.],
[-1., -1., -1., -1.]]], dtype=float32), 'confidence': array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0.]], dtype=float32), 'classes': array([[-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1.]], dtype=float32), 'num_detections': array([0], dtype=int32)}
当我尝试使用
model(image_test)
时,我得到:
predictions =
{'box': <tf.Tensor: shape=(1, 49104, 4), dtype=float32, numpy=
array([[[-0.09781634, 0.19049369, -0.04102969, -0.21385634],
[-0.2612432 , 0.17355151, -0.15664971, 0.14730531],
[-0.19195703, 0.3431507 , 0.14365467, 0.06342052],
...,
[ 0.00151845, 0.0044898 , -0.01494532, 0.00200025],
[ 0.01669142, -0.01205819, 0.01728174, 0.00686904],
[ 0.02757294, -0.00464585, -0.01185186, -0.00704384]]],
dtype=float32)>, 'classification': <tf.Tensor: shape=(1, 49104, 2), dtype=float32, numpy=
array([[[-4.491832 , -4.6013255],
[-4.2144737, -4.557256 ],
[-4.6533785, -4.6464787],
...,
[-4.5967946, -4.599116 ],
[-4.6118917, -4.59214 ],
[-4.5627737, -4.5775194]]], dtype=float32)>}
这是我使用的数据:
image_test =
tf.Tensor(
[[[[0.3839975 0.27996504 0.33198124]
[0.40149528 0.29109997 0.34629762]
[0.44508073 0.27996504 0.36252287]
...
[0.37477133 0.22842602 0.30159864]
[0.3979957 0.25674066 0.32736814]
[0.3760439 0.21379144 0.29491767]]
[[0.39481428 0.29269072 0.34375247]
[0.394178 0.30605265 0.35011533]
[0.40149528 0.30764335 0.35456935]
...
[0.42312893 0.26342162 0.34327528]
[0.43203694 0.25801322 0.34502506]
[0.40181342 0.22365387 0.31273365]]
[[0.41390282 0.30923405 0.36156845]
[0.37699834 0.31591508 0.34645668]
[0.3101885 0.2993717 0.3047801 ]
...
[0.44571704 0.2828283 0.36427265]
[0.4425356 0.25801322 0.35027438]
[0.41485724 0.24178797 0.3283226 ]]
...
[[0.3585461 0.27837428 0.31846023]
[0.29746282 0.2882367 0.29284978]
[0.29173627 0.28251016 0.2871232 ]
...
[0.29587212 0.29841724 0.29714468]
[0.31336996 0.2993717 0.3063708 ]
[0.36809036 0.30064425 0.3343673 ]]
[[0.36109126 0.29396328 0.32752725]
[0.32768634 0.29841724 0.31305176]
[0.3079615 0.27392033 0.2909409 ]
...
[0.3525014 0.3079615 0.33023146]
[0.34295717 0.30191684 0.322437 ]
[0.37063548 0.3025531 0.3365943 ]]
[[0.35568285 0.30191684 0.32879984]
[0.33182216 0.2936451 0.31273365]
[0.32705003 0.27551103 0.30128053]
...
[0.42853734 0.29269072 0.360614 ]
[0.40213156 0.28537342 0.34375247]
[0.3820886 0.2847371 0.33341286]]]], shape=(1, 512, 512, 3), dtype=float32)`
我的模型使用具有以下模式的图像和标签进行训练:
<tf.Tensor: shape=(1, 512, 512, 3), dtype=float32, numpy=
array([[[[0.33106965, 0.24011646, 0.285593 ],
[0.32864422, 0.24666505, 0.2876546 ],
[0.31190884, 0.24545236, 0.2786806 ],
...,
[0.27674025, 0.22459374, 0.250667 ],
[0.2527286 , 0.21925785, 0.2359932 ],
[0.27334464, 0.2211982 , 0.2472714 ]],
[[0.31215134, 0.25709438, 0.28462285],
[0.31554696, 0.25782198, 0.28668448],
[0.30657288, 0.25054574, 0.27855933],
...,
[0.2791657 , 0.22895953, 0.2540626 ],
[0.2660684 , 0.24423967, 0.25515404],
[0.29008004, 0.23041478, 0.2602474 ]],
[[0.30196458, 0.25685182, 0.2794082 ],
[0.3087558 , 0.2532137 , 0.28098473],
[0.30705798, 0.24690762, 0.2769828 ],
...,
[0.26364297, 0.21974294, 0.24169298],
[0.23914629, 0.2226534 , 0.23089983],
[0.2786806 , 0.22071311, 0.24969682]],
...,
[[0.29759887, 0.21877277, 0.2581858 ],
[0.30341986, 0.19936942, 0.25139466],
[0.3060878 , 0.18869756, 0.2473927 ],
...,
[0.21101144, 0.23381034, 0.22241089],
[0.20227993, 0.2231385 , 0.2127092 ],
[0.21295173, 0.22507884, 0.21901529]],
[[0.32670388, 0.22895953, 0.27783167],
[0.32888675, 0.20422022, 0.2665535 ],
[0.3233083 , 0.19500364, 0.259156 ],
...,
[0.21683241, 0.23672086, 0.22677661],
[0.21343681, 0.22071311, 0.21707493],
[0.21780258, 0.226049 , 0.2219258 ]],
[[0.30633035, 0.21537717, 0.26085377],
[0.32112542, 0.21343681, 0.26728114],
[0.32476354, 0.21295173, 0.26885766],
...,
[0.2231385 , 0.24254183, 0.23284017],
[0.21489207, 0.23041478, 0.2226534 ],
[0.20470531, 0.23235507, 0.2185302 ]]]], dtype=float32)>, {'boxes': <tf.RaggedTensor [[[22.0, 285.0, 4.0, 6.0],
[203.0, 301.0, 4.0, 4.0],
[237.0, 146.0, 25.0, 20.0]]]>, '课程':
主要代码:
from GPU_manager.gpu_manager import GPU_manager
from Configs.configs import CFG
from Data_factory.data_generator import Data_generator
import os.path
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers
import keras_cv
import datetime
from sklearn.model_selection import train_test_split
# Workaround to silent tf INFO messages (but not WARNING and ERROR messages)
# Must happen before importing tensorflow
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# GLOBALS
MODEL = 'retinaNET'
NB_SAMPLES = CFG['Model']["dataset"]["NB_SAMPLES"]
BATCH_SIZE = CFG['Model']["train"]["BATCH_SIZE"]
EPOCHS = CFG['Model']["train"]["EPOCHS"]
SPLIT_PATH = os.path.join(CFG["Resource"]["Dataset"]["Xview_radar"]["ANNOTATIONS_DIR"], "validation_scaled.csv")
CHIP_DIR = CFG["Resource"]["Dataset"]["Xview_radar"]["CHIP_DIR"]
TIME = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
CHECKPOINT_PATH = os.path.join(CFG["Resource"]["Savings"]["SAVE_DIR"], MODEL, "checkpoint", "fit" + TIME, "cp.ckpt")
LOGS_DIR = os.path.join(CFG["Resource"]["Savings"]["SAVE_DIR"], MODEL, "logs", "fit" + TIME)
# gpu configuration
gpu = GPU_manager()
gpu.setup_gpu_env()
# Dataset
with tf.device('/CPU:0'):
# load the data
patch_paths = Data_generator.parse_data_directory(CHIP_DIR)
train, remain = train_test_split(patch_paths, train_size=0.8, random_state=0, shuffle=True)
valid, test = train_test_split(remain, test_size=0.5, random_state=0, shuffle=True)
train_generator = Data_generator(train, BATCH_SIZE, split='train', shuffle_on_epoch_end=True, shuffle_at_init=True)
#train_dataset = train_generator.data_loader()
valid_generator = Data_generator(valid, BATCH_SIZE, split='valid', shuffle_on_epoch_end=True, shuffle_at_init=True)
#valid_dataset = valid_generator.data_loader()
test_generator = Data_generator(test, batch_size=1, split='test', shuffle_on_epoch_end=True, shuffle_at_init=True)
#test_dataset = test_generator.data_loader()
# Model
print("\nModel")
print(f"-- {MODEL}")
print(f"-- BATCH_SIZE : {BATCH_SIZE}")
print(f"-- EPOCHS : {EPOCHS}")
model = keras_cv.models.RetinaNet(
num_classes=2,
bounding_box_format="xywh",
backbone=keras_cv.models.ResNet50Backbone.from_preset(
"resnet50_imagenet"
)
)
model.backbone.trainable = False
for layer in model.layers:
if hasattr(layer, 'rate'):
layer.rate = 0.5
optimizer = tf.keras.optimizers.SGD(
learning_rate=0.01, momentum=0.9, global_clipnorm=10.0)
# Train model
model.compile(
classification_loss='focal',
box_loss='huber',
optimizer=optimizer,
jit_compile=False
)
callbacks = [
keras.callbacks.TensorBoard(log_dir=LOGS_DIR,
histogram_freq=1,
write_graph=True,
write_images=False,
update_freq='epoch',
profile_batch='1,2',
embeddings_freq=1),
keras.callbacks.ModelCheckpoint(CHECKPOINT_PATH, save_weights_only=True)
]
print("\nTraining")
fit_history = model.fit(
train_dataset,
validation_data=valid_dataset,
epochs=EPOCHS,
callbacks=callbacks
)
print("-- history:")
print(pd.DataFrame(fit_history.history))
print(">> Done")
我正在使用的配置
cuda-nvcc 11.3.58
cudatoolkit 11.2.2
cudnn 8.1.0.77
tensorflow 2.11
keras-cv 0.4.2 (unreleased version, github version of the day)
我尝试调整学习率、更改批量大小、随机播放示例、清理数据。预测还是一样。此外,每个时期的指标percent_matched_box_with_anchor 都是相等的。我期待像素坐标 (0/512) 中的边界框为 0 类或 1 类。
-1
表示没有识别,因此您的模型没有找到任何内容。返回填充值而不是什么都不返回的原因是维度始终相同,因此可以对数据进行批处理。
我没有发现代码有任何明显的错误,但我会检查 Tensorboard 中的进度。是学习吗?如果是这样,它在训练数据上是否可以正确工作?您想要预测的图像是否包含您想要检测的内容?你能检查不同的主干吗?
如果仍然没有学习,您应该尝试使用锚框和解码器,如果您的对象非常小或非常瘦,这一点尤其重要。