我正在尝试制作一个 TensorFlow 人脸检测模型。一个文件训练模型并将模型导出到此处显示的 .h5 文件中。
import tensorflow as tf
import cv2
from tensorflow.keras import layers
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.resnet50 import preprocess_input
image_size = (224, 224)
batch_size = 32
epochs = 10
def load_widerface_dataset(widerface_dir):
images_dir = os.path.join(widerface_dir, 'WIDER_train', 'images')
labels_file = os.path.join(widerface_dir, 'wider_face_split', 'wider_face_train_bbx_gt.txt')
X = []
y = []
with open(labels_file, 'r') as f:
lines = f.readlines()
num_images = int(lines[1])
current_line = 2
for _ in range(num_images):
image_path = os.path.join(images_dir, lines[current_line - 2].strip())
num_faces = int(lines[num_images])
image = Image.open(image_path)
image = image.resize((224, 224))
X.append(np.array(image))
faces = []
for i in range(num_faces):
face_line = lines[current_line + i].strip().split(' ')
face = [int(coord) for coord in face_line[:4]]
faces.append(face)
y.append(np.array(faces))
current_line += num_faces
if current_line < len(lines) and not lines[current_line].strip():
current_line += 1
X = np.array(X)
y = np.array(y)
return X, y
def build_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(image_size[0], image_size[1], 3)),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(4, activation='sigmoid')
])
return model
widerface_dir = 'C:/face_training/WIDER_train'
X_train, y_train = load_widerface_dataset(widerface_dir)
model = build_model()
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(X_train, y_train, batch_size = batch_size, epochs = epochs)
model.save('face_detection_model.h5')
另一个文件是拍摄视频并使用我在这里训练的模型分析视频并提取检测到面部的帧。这是它的代码。
import cv2
import os
from tensorflow.keras.models import load_model
import sys
import numpy as np
vidPath = "testclip2.mp4"
model_path = 'face_detection_model.h5'
model = load_model(model_path)
test_tensorflow = 'C:/Users/user/Documents/PyCharmProjects/test_tensorflow'
if not os.path.exists(test_tensorflow):
os.makedirs(test_tensorflow)
cap = cv2.VideoCapture(vidPath)
currentFrame = 0
while (cap.isOpened()):
ret, frame = cap.read()
if ret == True:
frame = cv2.resize(frame, (224, 224))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = frame / 255.0
frames = [frame]
for frame in frames:
prediction = model.predict(frame.reshape(1, 224, 224, 3))
if prediction.any() > 0.5:
cv2.imwrite(os.path.join(test_tensorflow, str(currentFrame) + '.jpg'), frame)
currentFrame += 1
else:
break
cap.release()
sys.exit(0)
现在我遇到了这个问题,程序只提取黑色图像。我不确定出了什么问题,我尝试阅读了很多东西,但似乎没有任何效果。非常感谢您的指导。
我认为问题在于您正在规范化 RGB 值,即从 0-255 的范围转换为 0-1 的范围。
这是建模和预测的常见做法,但是当您尝试使用 OpenCv 输出结果时,它不会理解归一化像素值。
您可以尝试此处建议的快速修复:https://stackoverflow.com/a/54165573/334402
这基本上只是恢复了 OpenCV 期望的原始 0-255 范围。