Python 3 + OpenCV + Mediapipe。多目标跟踪

Question

我还不太理解所有 Python 方法（事实上……还很遥远）。

我正在使用网络摄像头来跟踪一个人。效果很好。

但是，它会随机选择视图中的某个人并跟踪他们，有时会跳到另一个人，我正在努力解决这个问题。

我不确定它如何决定跟踪谁，但我的想法是识别单个目标（带有边界框是理想的），然后可能要求它跟踪该边界框内的骨架。

我这里的代码基本上来自于谷歌搜索的代码和修补。难免会有错误！但它确实可以运行并工作。

寻找了很长时间的工作示例来尝试复制，但没有成功。

`import cv2
import mediapipe as mp                   # Controls the image/video processing
import serial                            # Serial for comms to the Arduino
import time                              # For delays etc
import platform                          # Details of the system we are running
import atexit                            # Clean method of exiting
import serial.tools.list_ports           # Serial port information
import sys
import numpy as np                       # Processes numbers, strings and arrays
import keyboard                          # Allows use of the keyboard
#import pickle                           # Allows you to store data

print(platform.system(), platform.release())
print("Python version " + platform.python_version())
print("")

mp_drawing = mp.solutions.drawing_utils                                         # initialize Pose estimator
mp_pose = mp.solutions.pose

#mp_drawing_styles = mp.solutions.drawing_styles
#mp_holistic = mp.solutions.holistic                                             # Combines hands, pose and face meshes
#mp_face_mesh = mp.solutions.face_mesh                                           # Much more detailed face tracking

pose = mp_pose.Pose(
    min_detection_confidence=0.7,                                               # If confidence if >0.5, then start tracking
    min_tracking_confidence=0.7)                                                # If below min tracking, then go back to detection
    #static_image_mode = False                                                  # Whether to treat input images as static images or video stream - Default False
    #model_complexit = 1                                                        # Specify complexity of the pose landmark. 0, 1 or 2.  Default = 1
    #smooth_landmarks = True                                                    # Reduces jitter in the prediction.  Default is True

nvt_flag = False
bowflag = False
curtseyflag = False
showvideo = True
lux = 0
heartbeat = 0

pip_x_offset=50                                                                 # Position of the PIP
pip_y_offset=50

font = cv2.FONT_HERSHEY_SIMPLEX

#---------------------  Arduino communications -----------------------

def findArduinoUnoPort():                                                       # Check all the comm ports for an Arduino
    portList = list(serial.tools.list_ports.comports())
    for port in portList:
        if "VID:PID=2341:0043" in port[0]\
            or "VID:PID=2341:0043" in port[1]\
            or "VID:PID=2341:0043" in port[2]:
            print(port)
            print(port[0])
            print(port[1])
            print(port[2])
            return port[0]

def doAtExit():
    if serialUno.isOpen():
        serialUno.close()
        print("Close serial")
        print("serialUno.isOpen() = " + str(serialUno.isOpen()))

atexit.register(doAtExit)

unoPort = findArduinoUnoPort()
if not unoPort:
    print("No Arduino found")
    sys.exit("No Arduino found - Exiting system")

print("Arduino found: " + unoPort)
print()

serialArduino = serial.Serial(unoPort, 9600,timeout=0)
print("serialUno.isOpen() = " + str(serialArduino.isOpen()))


#------------------ Camera input selection -----------

header = cv2.imread("header.png")                                                                       # Create the header page
cv2.putText(header, 'SELECT VIDEO SOURCE', (30,310), font, 1, (0, 255, 255), 2, cv2.LINE_4)             # Text (frame, Text, pos, font, color, thickness)
cv2.putText(header, '1: Internal webcam', (30,360), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '2: External webcam', (30,410), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '3: Local video file', (30,460), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '4: External IP camera (no password)', (30,510), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.putText(header, '5: External IP camera (Password protected)', (30,560), font, 1, (0, 0, 255), 2, cv2.LINE_4)
cv2.imshow('Output', header)
cv2.waitKey(300)

while True:
    if keyboard.is_pressed('1'):
        cap = cv2.VideoCapture(0)                                                       # Laptop built-in webcam
        vsource = 1  
        break;

    elif keyboard.is_pressed('2'):
        cap = cv2.VideoCapture(1, cv2.CAP_DSHOW)                                        # External USB camera
        vsource = 2
        break;

    elif keyboard.is_pressed('3'):
        cap = cv2.VideoCapture('dance2.mp4')                                            # Read local video file
        vsource = 3
        break;

    elif keyboard.is_pressed('4'):
        cap = cv2.VideoCapture('rtsp://192.168.1.64/1')                                 # Capture from an IP camera
        vsource = 4
        break;


    elif keyboard.is_pressed('5'):
        cap = cv2.VideoCapture('rtsp://username:[email protected]/1')               # Capture from an IP camera with password + username
        vsource = 5
        break;

cv2.waitKey(10)
cv2.putText(header, 'Source ' + str(vsource) +' selected', (30,610), font, 1, (255, 0, 255), 2, cv2.LINE_4)      # Selected source
cv2.imshow('Output', header)   
cv2.waitKey(300)

serialArduino.write(str.encode('4'))                                            # Tell the Arduino we DO have a valid track (might get cancelled immediatley in the loop)

#------------------  Start the main camera routine -----------------------------------------------------------------------------------------------------------------------

while cap.isOpened():                                                           # read frame from capture object
    _, frame = cap.read()

    width = 1280                                                                # Resize the webcam feed
    height = 720
    dim = (width, height)
    frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)


#--------- Heartbeat Serial data to Arduino -----------------

    heartbeat += 1

    if heartbeat>50 and nvt_flag == False:
        heartbeat = 0
        serialArduino.write(str.encode('4'))                                    # Tell the Arduino we DO have a valid track.  (Pulse is checked on the Arduino)

#-------- Check the camera feed. Is there a target? ---------
    try:
        RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)                            # convert the frame to RGB format

        results = pose.process(RGB)                                             # process the RGB frame to get the result

        black = cv2.imread("black.png")                                         # Create a blank page for the 'video off' feed

        #print(results.pose_landmarks)                                          # Print the co-ords of ALL the skeleton landmarks

        if showvideo == True:                                                   # Show live video feed
            mp_drawing.draw_landmarks(                                                           # draw detected skeleton on the frame  (MP stands for mediapipe)
                frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(0,0,255), thickness=5, circle_radius=1),           # Definition of the landmark circles (B G R)
                mp_drawing.DrawingSpec(color=(255,255,255), thickness=2, circle_radius=1)        # Definition of the joining lines (B G R)
                )
        else:                                                                   # Draw black background
            mp_drawing.draw_landmarks(                                                           # draw detected skeleton on the frame  (MP stands for mediapipe)
                black, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(0,0,255), thickness=5, circle_radius=1),           # Definition of the landmark circles (B G R)
                mp_drawing.DrawingSpec(color=(255,255,255), thickness=2, circle_radius=1)        # Definition of the joining lines (B G R)
                )

        for id, lm in enumerate(results.pose_landmarks.landmark):               # Create a for loop to identify each skeleton joint
            h, w, c = frame.shape
            #print(id, lm)                                                      # Print ALL the skeleton joint position values

            if (id == 0):                                                       # Identify the nose
                cx0, cy0 = int(lm.x * w), int(lm.y * h)                         # Calculate the actual pixel value
                nose = cy0
                cv2.circle(frame, (cx0, cy0), 10, (255,0,0),3)                  # Larger round target on the nose

                if showvideo == False:
                    cv2.circle(black, (cx0, cy0), 20, (255,0,0),3)              # Larger round target on the nose
                    cv2.circle(black, (cx0, cy0), 40, (255,0,0),3)              # Larger round target on the head

            if (id == 11):                                                      # Identify the left shoulder
                cx11, cy11 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                lshoulder = cy11
                cv2.circle(frame, (cx11, cy11), 10, (0,255,0),3)                # Larger round target on the L shoulder
                cv2.circle(black, (cx11, cy11), 10, (0,255,0),1)                # Larger round target on the L shoulder

            if (id == 12):                                                      # Identify the right shoulder
                cx12, cy12 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                rshoulder = cy12
                cv2.circle(frame, (cx12, cy12), 10, (0,255,0),3)                # Larger round target on the R shoulder
                cv2.circle(black, (cx12, cy12), 10, (0,255,0),3)                # Larger round target on the R shoulder

            if (id == 25):                                                      # Identify the left knee
                cx25, cy25 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                lknee = cy25
                cv2.circle(frame, (cx25, cy25), 10, (0,255,255),3)              # Larger round target on the left knee
                cv2.circle(black, (cx25, cy25), 10, (0,255,255),3)              # Larger round target on the left knee

            if (id == 26):                                                      # Identify the right knee
                cx26, cy26 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                rknee = cy26
                cv2.circle(frame, (cx26, cy26), 10, (0,255,255),3)              # Larger round target on the right knee
                cv2.circle(black, (cx26, cy26), 10, (0,255,255),3)              # Larger round target on the right knee

            if (id == 23):                                                      # Identify the left hip
                cx23, cy23 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                lhip= cy23
                cv2.circle(frame, (cx23, cy23), 10, (0,255,255),3)              # Larger round target on the left hip
                cv2.circle(black, (cx23, cy23), 10, (0,255,255),3)              # Larger round target on the left hip
            
            if (id == 24):                                                      # Identify the right hip
                cx24, cy24 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                rhip = cy24
                cv2.circle(frame, (cx24, cy24), 10, (0,255,255),3)              # Larger round target on the right hip
                cv2.circle(black, (cx24, cy24), 10, (0,255,255),3)              # Larger round target on the right hip

            if (id == 31):                                                      # Identify the left foot
                cx31, cy31 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                lfoot = cy31
                cv2.circle(frame, (cx31, cy31), 10, (255,0,255),3)              # Larger round target on the left foot
                cv2.circle(black, (cx31, cy31), 10, (255,0,255),3)              # Larger round target on the left foot
            
            if (id == 32):                                                      # Identify the right foot
                cx32, cy32 = int(lm.x * w), int(lm.y * h)                       # Calculate the actual pixel value
                rfoot = cy32
                cv2.circle(frame, (cx32, cy32), 10, (255,0,255),3)              # Larger round target on the right foot
                cv2.circle(black, (cx32, cy32), 10, (255,0,255),3)              # Larger round target on the right foot


#---------------- Target distance & height --------------------
                
        if (lfoot-nose)>(rfoot-nose):                                           # Height in pixels calculation (foot to nose)
            height = lfoot-nose
        else:
            height = rfoot-nose                                                 # CURRENTLY NOT USING THIS CALC

        width = cx11-cx12                                                       # Live reported width of the shoulders in pixels

        focallength = (320 * 1000) / 280                                        # Work out the focal length (320 is between the shoulders in pixels)  1000mm is the inital setup distance
                                                                                # 280mm is the measured width of the shoulders at 1000mm

        livedistance  = (280 * focallength) / width                             # Therefore, work out the live range to target
  
        if showvideo == True:
            cv2.putText(frame, 'Distance to target: ' + str(int(livedistance)) + ' mm', (20,70), font, 1, (0, 255, 0), 2, cv2.LINE_4)
        else:    
            cv2.putText(black, 'Distance to target: ' + str(int(livedistance)) + ' mm', (20,70), font, 1, (0, 0 , 255), 2, cv2.LINE_4)




            if showvideo == True:
                frame[pip_y_offset:pip_y_offset+resize_img.shape[0], pip_x_offset:pip_x_offset+resize_img.shape[1]] = resize_img    # Display the respect image PIP
                cv2.putText(frame, 'Live video mode', (20,30), font, 1, (0, 255, 255), 2, cv2.LINE_4) 
                cv2.putText(frame, 'TEST MODE', (300,30), font, 1, (0, 0, 255), 2, cv2.LINE_4)
                cv2.imshow('Output', frame)
                
            if showvideo == False:             
                black[pip_y_offset:pip_y_offset+resize_img.shape[0], pip_x_offset:pip_x_offset+resize_img.shape[1]] = resize_img    # Display the respect image PIP
                cv2.putText(black, 'Masked video mode', (20,30), font, 1, (0, 0, 255), 2, cv2.LINE_4)
                cv2.putText(black, 'TEST MODE', (360,30), font, 1, (0, 255, 255), 2, cv2.LINE_4)
                cv2.imshow('Output', black)

#------------ Toggle video mode ---------------------------------


        if keyboard.is_pressed('v') and showvideo == True:
                showvideo = False
                while keyboard.is_pressed('v'):
                    cv2.waitKey(1)
                
        if keyboard.is_pressed('v') and showvideo == False:
                showvideo = True
                while keyboard.is_pressed('v'):
                    cv2.waitKey(1)

        if showvideo == True:
            cv2.putText(frame, 'Live video mode', (20,30), font, 1, (0, 255, 0), 2, cv2.LINE_4)               # Text (frame, Text, pos, font, color, thickness)
            cv2.imshow('Output', frame)                                                                       # Display the live video feed (window name, image)
        else:
            cv2.putText(black, 'Masked video mode', (20,30), font, 1, (0, 0, 255), 2, cv2.LINE_4)             # Text (frame, Text, pos, font, color, thickness)
            cv2.imshow('Output', black)                                                                       # Display the blank background image

#------------ tracking online Y/N --------------
            
        if nvt_flag == True:                                                    # Reset the no valid target flag
            #serialArduino.write(str.encode('4'))                                # Tell the Arduino we DO have a valid track
            print("Tracking online")
            nvt_flag = False
            
    except:
        serialArduino.write(str.encode('5'))                                    # Tell the Arduino we do NOT have a valid track
        if nvt_flag == False:
            print("No valid target")
            nvt_image = cv2.imread("nvt.png")                                   # Create the no valid target image
            cv2.imshow('Output',nvt_image)                                      # Display the no valid target image  
            nvt_flag = True                                                     # Set the tracking error message flag
        #break                                                                  # Disable the frame draw error break
            
    if cv2.waitKey(1) == ord('q'):
        print("Exit routine") 
        break
    
cap.release()                                                                   # Closes the video window
cv2.destroyAllWindows()`

Answer 1

使用新的媒体管道：设置

num_poses > 1

。请参阅https://developers.google.com/mediapipe/solutions/vision/pose_landmarker/python

Python 3 + OpenCV + Mediapipe。多目标跟踪

问题描述投票：0回答：1

1个回答

最新问题

Python 3 + OpenCV + Mediapipe。多目标跟踪

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1