我正在尝试从视频数据构建 cnn-lstm 模型。这是特征数据frames_data=(150120, 128, 128, 3)和label_data=(150120, 19)的形状,我如何重塑它们以用作cnn-lstm模型的输入。谢谢 这是提取帧的代码
class_names = os.listdir(root_folder_path)
# Empty arrays to store extracted frames and corresponding labels
frames_data = []
labels_data = []
# Function to extract frames from a video file, normalize them, and append to the arrays
def extract_frames(video_path, label, num_frames=10, frame_size=(128, 128)):
cap = cv2.VideoCapture(video_path)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_indices = np.linspace(0, frame_count - 1, num_frames, dtype=np.int)
frame_num = 0
while True:
ret, frame = cap.read()
if not ret:
break
if frame_num in frame_indices:
# Resize the frame to the desired size
resized_frame = cv2.resize(frame, frame_size)
# Normalize the frame to values between 0 and 1
normalized_frame = resized_frame / 255.0
frames_data.append(normalized_frame)
labels_data.append(label)
frame_num += 1
cap.release()
# Parameters for frame extraction
num_frames_per_video = 10
frame_size = (128, 128)
# Loop through each class folder and extract frames
for i, class_name in enumerate(class_names):
class_folder_path = os.path.join(root_folder_path, class_name)
video_files = [f for f in os.listdir(class_folder_path) if f.endswith('.mp4')]
for video_file in video_files:
video_path = os.path.join(class_folder_path, video_file)
extract_frames(video_path, i, num_frames=num_frames_per_video, frame_size=frame_size)
# Convert lists to numpy arrays for easier manipulation (optional)
frames_data = np.array(frames_data)
labels_data = np.array(labels_data)
我尝试重塑它,但它给了我错误
ValueError: Input 0 of layer "max_pooling2d_16" is incompatible with the layer: expected ndim=4, found ndim=5. Full shape received: (None, 10, 128, 128, 16)