我在这个github上找到了以下代码。联系. 它允许程序检测视频中极小的动作。
我想测试一下这段代码,看看它的工作情况,所以我就把我在链接上找到的下面的代码在Pycharm中试了一下。
import cv2
import numpy as np
import scipy.signal as signal
import scipy.fftpack as fftpack
#convert RBG to YIQ
def rgb2ntsc(src):
[rows,cols]=src.shape[:2]
dst=np.zeros((rows,cols,3),dtype=np.float64)
T = np.array([[0.114, 0.587, 0.298], [-0.321, -0.275, 0.596], [0.311, -0.528, 0.212]])
for i in range(rows):
for j in range(cols):
dst[i, j]=np.dot(T,src[i,j])
return dst
#convert YIQ to RBG
def ntsc2rbg(src):
[rows, cols] = src.shape[:2]
dst=np.zeros((rows,cols,3),dtype=np.float64)
T = np.array([[1, -1.108, 1.705], [1, -0.272, -0.647], [1, 0.956, 0.620]])
for i in range(rows):
for j in range(cols):
dst[i, j]=np.dot(T,src[i,j])
return dst
#Build Gaussian Pyramid
def build_gaussian_pyramid(src,level=3):
s=src.copy()
pyramid=[s]
for i in range(level):
s=cv2.pyrDown(s)
pyramid.append(s)
return pyramid
#Build Laplacian Pyramid
def build_laplacian_pyramid(src,levels=3):
gaussianPyramid = build_gaussian_pyramid(src, levels)
pyramid=[]
for i in range(levels,0,-1):
GE=cv2.pyrUp(gaussianPyramid[i])
L=cv2.subtract(gaussianPyramid[i-1],GE)
pyramid.append(L)
return pyramid
#load video from file
def load_video(video_filename):
cap=cv2.VideoCapture(video_filename)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
print(frame_count, height, width)
video_tensor=np.zeros((frame_count,height,width,3),dtype='float')
x=0
while cap.isOpened():
ret,frame=cap.read()
if ret is True:
video_tensor[x]=frame
x+=1
else:
break
return video_tensor,fps
# apply temporal ideal bandpass filter to gaussian video
def temporal_ideal_filter(tensor,low,high,fps,axis=0):
fft=fftpack.fft(tensor,axis=axis)
frequencies = fftpack.fftfreq(tensor.shape[0], d=1.0 / fps)
bound_low = (np.abs(frequencies - low)).argmin()
bound_high = (np.abs(frequencies - high)).argmin()
fft[:bound_low] = 0
fft[bound_high:-bound_high] = 0
fft[-bound_low:] = 0
iff=fftpack.ifft(fft, axis=axis)
return np.abs(iff)
# build gaussian pyramid for video
def gaussian_video(video_tensor,levels=3):
for i in range(0,video_tensor.shape[0]):
frame=video_tensor[i]
pyr=build_gaussian_pyramid(frame,level=levels)
gaussian_frame=pyr[-1]
if i==0:
vid_data=np.zeros((video_tensor.shape[0],gaussian_frame.shape[0],gaussian_frame.shape[1],3))
vid_data[i]=gaussian_frame
return vid_data
#amplify the video
def amplify_video(gaussian_vid,amplification=50):
return gaussian_vid*amplification
#reconstract video from original video and gaussian video
def reconstract_video(amp_video,origin_video,levels=3):
final_video=np.zeros(origin_video.shape)
for i in range(0,amp_video.shape[0]):
img = amp_video[i]
for x in range(levels):
img=cv2.pyrUp(img)
img=img+origin_video[i]
final_video[i]=img
return final_video
#save video to files
def save_video(video_tensor):
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
[height,width]=video_tensor[0].shape[0:2]
writer = cv2.VideoWriter("out.avi", fourcc, 30, (width, height), 1)
for i in range(0,video_tensor.shape[0]):
writer.write(cv2.convertScaleAbs(video_tensor[i]))
writer.release()
#magnify color
def magnify_color(video_name,low,high,levels=3,amplification=20):
t,f=load_video(video_name)
gau_video=gaussian_video(t,levels=levels)
filtered_tensor=temporal_ideal_filter(gau_video,low,high,f)
amplified_video=amplify_video(filtered_tensor,amplification=amplification)
final=reconstract_video(amplified_video,t,levels=3)
save_video(final)
#build laplacian pyramid for video
def laplacian_video(video_tensor,levels=3):
tensor_list=[]
for i in range(0,video_tensor.shape[0]):
frame=video_tensor[i]
pyr=build_laplacian_pyramid(frame,levels=levels)
if i==0:
for k in range(levels):
tensor_list.append(np.zeros((video_tensor.shape[0],pyr[k].shape[0],pyr[k].shape[1],3)))
for n in range(levels):
tensor_list[n][i] = pyr[n]
return tensor_list
#butterworth bandpass filter
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
omega = 0.5 * fs
low = lowcut / omega
high = highcut / omega
b, a = signal.butter(order, [low, high], btype='band')
y = signal.lfilter(b, a, data, axis=0)
return y
#reconstract video from laplacian pyramid
def reconstract_from_tensorlist(filter_tensor_list,levels=3):
final=np.zeros(filter_tensor_list[-1].shape)
for i in range(filter_tensor_list[0].shape[0]):
up = filter_tensor_list[0][i]
for n in range(levels-1):
up=cv2.pyrUp(up)+filter_tensor_list[n + 1][i]
final[i]=up
return final
#manify motion
def magnify_motion(video_name,low,high,levels=3,amplification=20):
t,f=load_video(video_name)
lap_video_list=laplacian_video(t,levels=levels)
filter_tensor_list=[]
for i in range(levels):
filter_tensor=butter_bandpass_filter(lap_video_list[i],low,high,f)
filter_tensor*=amplification
filter_tensor_list.append(filter_tensor)
recon=reconstract_from_tensorlist(filter_tensor_list)
final=t+recon
save_video(final)
if __name__=="__main__":
# magnify_color("baby.mp4",0.4,3)
magnify_motion("baby.mp4",0.4,3)
视频文件非常小 (baby.mp4 - 1797 KB 和 guitar.mp4 - 1171 KB) 但是当我运行baby.mp4文件时,我得到了以下错误。
video_tensor=np.zeros((frame_count,height,width,3),dtype='float')
ValueError: array is too big; `arr.size * arr.dtype.itemsize` is larger than the maximum possible size.
当我运行吉他.mp4,我得到以下错误。
tensor_list.append(np.zeros((video_tensor.shape[0],pyr[k].shape[0],pyr[k].shape[1],3)))
MemoryError: Unable to allocate 142. MiB for an array with shape (300, 96, 216, 3) and data type float64
看来主要问题出在我的电脑上。但我有16GB的内存,我检查了我有超过8GB的免费使用。然后我试着运行程序,同时检查我的内存,但我的内存没有任何变化。所以我很困惑,为什么会出现内存错误。
我在Anaconda中运行它,它开始完全正常工作。我怀疑我在Pycharm中使用了一个内存受限的虚拟环境,并给出了内存错误。