我想训练ET-Track(一个不错的视频对象跟踪器)。它基于 Ocean(另一个视频对象跟踪器)。
问题是他们之前没有分享任何用于训练它的代码。现在我们有了代码(从 Ocean 更改代码),但我们有一个大问题。当我们尝试为模板图像裁剪图像时,我们会发生随机变化。这是一个例子:
你可以看到模型将用它训练的 search2 图像有一个变化。我能弄清楚为什么我有这个转变。你能帮我吗?
(template,search,out_label,reg_label,reg_weight,bbox,) = dataset[0]
x1, y1, x2, y2 = map(int, bbox)
search = search.transpose((1, 2, 0)).astype(np.uint8)
search = cv2.rectangle((search) , (x1, y1), (x2, y2), (200, 100, 150),1)
reg_weight = cv2.cvtColor(reg_weight.astype(np.uint8), cv2.COLOR_GRAY2RGB)
reg_weight = cv2.resize(reg_weight, (search.shape[1], search.shape[0]))
out_label = cv2.cvtColor(out_label.astype(np.uint8) * 255, cv2.COLOR_GRAY2RGB)
out_label = cv2.resize(out_label, (search.shape[1], search.shape[0]))
x1, y1, x2, y2 = map(int, bbox)
search2 = cv2.rectangle(search * reg_weight, (x1, y1), (x2, y2), (200, 100, 150))
cv2.imshow("search2", search2)
cv2.imshow("search",search)
cv2.imshow("out_label", out_label)
这就是这些输出的创建方式。
这是我的数据集
__getitem__
:
if self.random_data:
template, search = self._get_pairs(index)
#choose 2 random image for search and template
template_image = cv2.imread(template[0].as_posix())
search_image = cv2.imread(search[0].as_posix())
# change bboxes format and pick the first one
template_target_bbox = self.yolo2ocean(template[1], template_image)
search_target_bbox = self.yolo2ocean(search[1], search_image)
_, template_image = crop_like_SiamFC(
template_image,
bbox=template_target_bbox,
exemplar_size=self.template_size,
instance_size=self.search_size,
)
_, search_image = crop_like_SiamFC(
search_image,
bbox=search_target_bbox,
exemplar_size=self.template_size,
instance_size=self.search_size + self.search_margin,
)
template_box = self._toBBox(template_image, template_target_bbox)
search_box = self._toBBox(search_image, search_target_bbox)
template, _, _ = self._augmentation(
template_image, template_box, self.template_size
)
search, bbox, dag_param = self._augmentation(
search_image, search_box, self.search_size, search=True
)
#No augment i have turned off all of them!
# from PIL image to numpy
template = np.array(template)
search = np.array(search)
out_label = self._dynamic_label([self.size, self.size], dag_param.shift)
reg_label, reg_weight = self.reg_label(bbox)
我认为
self.reg_label
是实际问题,但我不知道为什么?
这是函数:
def reg_label(self, bbox):
"""
generate regression label
:param bbox: [x1, y1, x2, y2]
:return: [l, t, r, b]
"""
x1, y1, x2, y2 = bbox
l = self.grid_to_search_x - x1 # [17, 17]
t = self.grid_to_search_y - y1
r = x2 - self.grid_to_search_x
b = y2 - self.grid_to_search_y
l, t, r, b = map(lambda x: np.expand_dims(x, axis=-1), [l, t, r, b])
reg_label = np.concatenate((l, t, r, b), axis=-1) # [17, 17, 4]
reg_label_min = np.min(reg_label, axis=-1)
inds_nonzero = (reg_label_min > 0).astype(float)
return reg_label, inds_nonzero
def grids(self):
"""
each element of feature map on input search image
:return: H*W*2 (position for each element)
"""
sz = self.size #25
sz_x = sz // 2
sz_y = sz // 2
x, y = np.meshgrid(
np.arange(0, sz) - np.floor(float(sz_x)),
np.arange(0, sz) - np.floor(float(sz_y)),
)
self.grid_to_search = {}
self.stride = 8
self.grid_to_search_x = x * self.stride + self.search_size // 2
self.grid_to_search_y = y * self.stride + self.search_size // 2
任何想法都会有帮助。非常感谢
干得好,雷扎! 如果您在 github 或原始存储库中分享完整的代码,那就太好了。 关于你的问题,可能是你的stirde和grid_to_search_y错了。只要尝试改变它们,一切都会好起来的! 祝你好运,伙计,在问你的问题之前最好多思考一下,你的大脑是你作为开发人员或数据科学家的力量:)