在opencv中进行物体检测时仅播放一次声音文件

Question

下面的程序是使用 openCV 的 Airrum 实现。蓝色物体被视为鼓槌，当这些物体出现在感兴趣的区域时，就会播放声音。我希望声音只播放一次。
截至目前，如果我只是将鼓槌保持在感兴趣的区域（这就是代码的设计目的），声音就会不断循环和播放，但我想改进它。
我在想是否有某种方法可以让鼓槌离开感兴趣的区域并返回时才播放声音。
任何资源或帮助将不胜感激。这是代码：

import numpy as np
import time
import cv2
import pygame


def state_machine(sumation,sound):

    # Check if blue color object present in the ROI     
    yes = (sumation) > Hatt_thickness[0]*Hatt_thickness[1]*0.8

    # If present play the respective instrument.
    if yes and sound==1:
        drum_clap.play()
        
    elif yes and sound==2:
        drum_snare.play()

def ROI_analysis(frame,sound):
    

    # converting the image into HSV
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    # generating mask for 
    mask = cv2.inRange(hsv, blueLower, blueUpper)
    
    # Calculating the nuber of white pixels depecting the blue color pixels in the ROI
    sumation = np.sum(mask)
    
    # Function that decides to play the instrument or not.
    state_machine(sumation,sound)

    
    return mask


Verbsoe = False

# importing the audio files
pygame.init()
drum_clap = pygame.mixer.Sound('batterrm.wav')
drum_snare = pygame.mixer.Sound('button-2.ogg')


# HSV range for detecting blue color 
blueLower = (80,150,10)
blueUpper = (120,255,255)

# Frame accusition from webcam/ usbcamera 
camera = cv2.VideoCapture(0)
ret,frame = camera.read()
H,W = frame.shape[:2]

kernel = np.ones((7,7),np.uint8)

# reading the image of hatt and snare for augmentation.
Hatt = cv2.resize(cv2.imread('./Images/Hatt.png'),(200,100),interpolation=cv2.INTER_CUBIC)
Snare = cv2.resize(cv2.imread('./Images/Snare.png'),(200,100),interpolation=cv2.INTER_CUBIC)


# Setting the ROI area for blue color detection
Hatt_center = [np.shape(frame)[1]*2//8,np.shape(frame)[0]*6//8]
Snare_center = [np.shape(frame)[1]*6//8,np.shape(frame)[0]*6//8]
Hatt_thickness = [200,100]
Hatt_top = [Hatt_center[0]-Hatt_thickness[0]//2,Hatt_center[1]-Hatt_thickness[1]//2]
Hatt_btm = [Hatt_center[0]+Hatt_thickness[0]//2,Hatt_center[1]+Hatt_thickness[1]//2]

Snare_thickness = [200,100]
Snare_top = [Snare_center[0]-Snare_thickness[0]//2,Snare_center[1]-Snare_thickness[1]//2]
Snare_btm = [Snare_center[0]+Snare_thickness[0]//2,Snare_center[1]+Snare_thickness[1]//2]


time.sleep(1)

while True:
    
    # grab the current frame
    ret, frame = camera.read()
    frame = cv2.flip(frame,1)

    if not(ret):
        break
    
    # Selecting ROI corresponding to snare
    snare_ROI = np.copy(frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]])
    mask = ROI_analysis(snare_ROI,1)

    # Selecting ROI corresponding to Hatt
    hatt_ROI = np.copy(frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]])
    mask = ROI_analysis(hatt_ROI,2)

    # A writing text on an image.
    cv2.putText(frame,'Project: Air Drums',(10,30),2,1,(20,20,20),2)
    
    # Display the ROI to view the blue colour being detected
    if Verbsoe:
        # Displaying the ROI in the Image
        frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]] = cv2.bitwise_and(frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]],frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]], mask=mask[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]])
        frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]] = cv2.bitwise_and(frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]],frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]],mask=mask[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]])
    
    # Augmenting the instruments in the output frame.
    else:
        # Augmenting the image of the instruments on the frame.
        frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]] = cv2.addWeighted(Snare, 1, frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]], 1, 0)
        frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]] = cv2.addWeighted(Hatt, 1, frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]], 1, 0)
    
    
    cv2.imshow('Output',frame)
    key = cv2.waitKey(1) & 0xFF
    # if the 'q' key is pressed, stop the loop
    if key == ord("q"):
        break
 
# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()

Answer 1

解决此问题的一种方法是使用布尔值来跟踪鼓槌之前是否已经播放过声音。如果您希望它在进入某个区域时只播放一次声音，请在播放一次声音后将此标志设置为 True，并在删除时将其重置为 False。这是添加了标志的相同代码：

import numpy as np
import time
import cv2
import pygame

IS_PLAYING = False    # Keeps track of whether sound has been played this "hover" or
                      # not

def state_machine(sumation,sound):

    global IS_PLAYING

    # Check if blue color object present in the ROI     
    yes = (sumation) > Hatt_thickness[0]*Hatt_thickness[1]*0.8
    if yes:
        if not IS_PLAYING:    # If sound has not been played

            # If present play the respective instrument.
            if yes and sound==1:
                drum_clap.play()
                
            elif yes and sound==2:
                drum_snare.play()

            IS_PLAYING = True  # Sound has been played, so we do not need to repeat

    else:    # Mouse is not over the drum-good to play sound again
        IS_PLAYING = False


def ROI_analysis(frame,sound):
    

    # converting the image into HSV
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    # generating mask for 
    mask = cv2.inRange(hsv, blueLower, blueUpper)
    
    # Calculating the nuber of white pixels depecting the blue color pixels in the ROI
    sumation = np.sum(mask)
    
    # Function that decides to play the instrument or not.
    state_machine(sumation,sound)

    
    return mask


Verbsoe = False

# importing the audio files
pygame.init()
drum_clap = pygame.mixer.Sound('batterrm.wav')
drum_snare = pygame.mixer.Sound('button-2.ogg')


# HSV range for detecting blue color 
blueLower = (80,150,10)
blueUpper = (120,255,255)

# Frame accusition from webcam/ usbcamera 
camera = cv2.VideoCapture(0)
ret,frame = camera.read()
H,W = frame.shape[:2]

kernel = np.ones((7,7),np.uint8)

# reading the image of hatt and snare for augmentation.
Hatt = cv2.resize(cv2.imread('./Images/Hatt.png'),(200,100),interpolation=cv2.INTER_CUBIC)
Snare = cv2.resize(cv2.imread('./Images/Snare.png'),(200,100),interpolation=cv2.INTER_CUBIC)


# Setting the ROI area for blue color detection
Hatt_center = [np.shape(frame)[1]*2//8,np.shape(frame)[0]*6//8]
Snare_center = [np.shape(frame)[1]*6//8,np.shape(frame)[0]*6//8]
Hatt_thickness = [200,100]
Hatt_top = [Hatt_center[0]-Hatt_thickness[0]//2,Hatt_center[1]-Hatt_thickness[1]//2]
Hatt_btm = [Hatt_center[0]+Hatt_thickness[0]//2,Hatt_center[1]+Hatt_thickness[1]//2]

Snare_thickness = [200,100]
Snare_top = [Snare_center[0]-Snare_thickness[0]//2,Snare_center[1]-Snare_thickness[1]//2]
Snare_btm = [Snare_center[0]+Snare_thickness[0]//2,Snare_center[1]+Snare_thickness[1]//2]


time.sleep(1)

while True:
    
    # grab the current frame
    ret, frame = camera.read()
    frame = cv2.flip(frame,1)

    if not(ret):
        break
    
    # Selecting ROI corresponding to snare
    snare_ROI = np.copy(frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]])
    mask = ROI_analysis(snare_ROI,1)

    # Selecting ROI corresponding to Hatt
    hatt_ROI = np.copy(frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]])
    mask = ROI_analysis(hatt_ROI,2)

    # A writing text on an image.
    cv2.putText(frame,'Project: Air Drums',(10,30),2,1,(20,20,20),2)
    
    # Display the ROI to view the blue colour being detected
    if Verbsoe:
        # Displaying the ROI in the Image
        frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]] = cv2.bitwise_and(frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]],frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]], mask=mask[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]])
        frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]] = cv2.bitwise_and(frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]],frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]],mask=mask[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]])
    
    # Augmenting the instruments in the output frame.
    else:
        # Augmenting the image of the instruments on the frame.
        frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]] = cv2.addWeighted(Snare, 1, frame[Snare_top[1]:Snare_btm[1],Snare_top[0]:Snare_btm[0]], 1, 0)
        frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]] = cv2.addWeighted(Hatt, 1, frame[Hatt_top[1]:Hatt_btm[1],Hatt_top[0]:Hatt_btm[0]], 1, 0)
    
    
    cv2.imshow('Output',frame)
    key = cv2.waitKey(1) & 0xFF
    # if the 'q' key is pressed, stop the loop
    if key == ord("q"):
        break
 
# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()

希望这能回答您的问题！如果您对此答案有任何疑问，请随时告诉我。

Answer 2

要使声音在鼓槌离开感兴趣区域并返回时仅播放一次，您可以使用标志来跟踪状态。这是代码的更新版本：

import numpy as np
import time
import cv2
import pygame

def state_machine(sumation, sound, sound_playing):
    # Check if blue color object present in the ROI
    yes = (sumation) > Hatt_thickness[0]*Hatt_thickness[1]*0.8

    # If present and sound is not already playing, play the respective instrument.
    if yes and sound == 1 and not sound_playing[0]:
        drum_clap.play()
        sound_playing[0] = True
    elif yes and sound == 2 and not sound_playing[1]:
        drum_snare.play()
        sound_playing[1] = True
    # If not present, reset the sound playing flag.
    elif not yes:
        sound_playing[0] = False
        sound_playing[1] = False

def ROI_analysis(frame, sound):
    # converting the image into HSV
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    # generating mask for blue color
    mask = cv2.inRange(hsv, blueLower, blueUpper)

    # Calculating the number of white pixels depecting the blue color pixels in the ROI
    sumation = np.sum(mask)

    # Function that decides to play the instrument or not.
    state_machine(sumation, sound, sound_playing)

    return mask

Verbose = False

# importing the audio files
pygame.init()
drum_clap = pygame.mixer.Sound('batterrm.wav')
drum_snare = pygame.mixer.Sound('button-2.ogg')

# HSV range for detecting blue color
blueLower = (80, 150, 10)
blueUpper = (120, 255, 255)

# Frame acquisition from webcam/ usbcamera
camera = cv2.VideoCapture(0)
ret, frame = camera.read()
H, W = frame.shape[:2]

kernel = np.ones((7, 7), np.uint8)

# reading the image of hatt and snare for augmentation.
Hatt = cv2.resize(cv2.imread('./Images/Hatt.png'), (200, 100), interpolation=cv2.INTER_CUBIC)
Snare = cv2.resize(cv2.imread('./Images/Snare.png'), (200, 100), interpolation=cv2.INTER_CUBIC)

# Setting the ROI area for blue color detection
Hatt_center = [np.shape(frame)[1] * 2 // 8, np.shape(frame)[0] * 6 // 8]
Snare_center = [np.shape(frame)[1] * 6 // 8, np.shape(frame)[0] * 6 // 8]
Hatt_thickness = [200, 100]
Hatt_top = [Hatt_center[0] - Hatt_thickness[0] // 2, Hatt_center[1] - Hatt_thickness[1] // 2]
Hatt_btm = [Hatt_center[0] + Hatt_thickness[0] // 2, Hatt_center[1] + Hatt_thickness[1] // 2]

Snare_thickness = [200, 100]
Snare_top = [Snare_center[0] - Snare_thickness[0] // 2, Snare_center[1] - Snare_thickness[1] // 2]
Snare_btm = [Snare_center[0] + Snare_thickness[0] // 2, Snare_center[1] + Snare_thickness[1] // 2]

time.sleep(1)

sound_playing = [False, False]  # Flag to keep track of sound playing state

while True:
    # grab the current frame
    ret, frame = camera.read()
    frame = cv2.flip(frame, 1)

    if not ret:
        break

    # Selecting ROI corresponding to snare
    snare_ROI = np.copy(frame[Snare_top[1]:Snare_btm[1], Snare_top[0]:Snare_btm[0]])
    mask = ROI_analysis(snare_ROI, 1)

    # Selecting ROI corresponding to Hatt
    hatt_ROI = np.copy(frame[Hatt_top[1]:Hatt_btm[1], Hatt_top[0]:Hatt_btm[0]])
    mask = ROI_analysis(hatt_ROI, 2)

    # A writing text on an image.
    cv2.putText(frame, 'Project: Air Drums', (10, 30), 2, 1, (20, 20, 20), 2)

    # Display the ROI to view the blue colour being detected
    if Verbose:
        # Displaying the ROI in the Image
        frame[Snare_top[1]:Snare_btm[1], Snare_top[0]:Snare_btm[0]] = cv2.bitwise_and(
            frame[Snare_top[1]:Snare_btm[1], Snare_top[0]:Snare_btm[0]],
            frame[Snare_top[1]:Snare_btm[1], Snare_top[0]:Snare_btm[0]], mask=mask[Snare_top[1]:Snare_btm[1], Snare_top[0]:Snare_btm[0]])
        frame[Hatt_top[1]:Hatt_btm[1], Hatt_top[0]:Hatt_btm[0]] = cv2.bitwise_and(
            frame[Hatt_top[1]:Hatt_btm[1], Hatt_top[0]:Hatt_btm[0]],
            frame[Hatt_top[1]:Hatt_btm[1], Hatt_top[0]:Hatt_btm[0]], mask=mask[Hatt_top[1]:Hatt_btm[1], Hatt_top[0]:Hatt_btm[0]])

    # Augmenting the instruments in the output frame.
    else:
        # Augmenting the image of the instruments on the frame.
        frame[Snare_top[1]:Snare_btm[1], Snare_top[0]:Snare_btm[0]] = cv2.addWeighted(Snare, 1,
                                                                                     frame[Snare_top[1]:Snare_btm[1],
                                                                                     Snare_top[0]:Snare_btm[0]], 1, 0)
        frame[Hatt_top[1]:Hatt_btm[1], Hatt_top[0]:Hatt_btm[0]] = cv2.addWeighted(Hatt, 1,
                                                                                   frame[Hatt_top[1]:Hatt_btm[1],
                                                                                   Hatt_top[0]:Hatt_btm[0]], 1, 0)

    cv2.imshow('Output', frame)
    key = cv2.waitKey(1) & 0xFF
    # if the 'q' key is pressed, stop the loop
    if key == ord("q"):
        break

# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()

在此更新的代码中，添加了一个

sound_playing

列表来跟踪每个声音（drum_clap 和鼓_snare）的状态。当鼓槌出现在感兴趣的区域中时，仅当相应的声音标志尚未设置为 True 时才会播放相应的声音。当鼓槌消失时，声音标志重置为 False。

在opencv中进行物体检测时仅播放一次声音文件

问题描述投票：0回答：2

2个回答

最新问题

在opencv中进行物体检测时仅播放一次声音文件

问题描述 投票：0回答：2

2个回答

最新问题

问题描述投票：0回答：2