我正在学习关于 Open CV 的教程并尝试重写以下代码以使用 macOS(Darwin,macOS 在 M1 Pro MackBook Pro 上运行):https://github.com/learncodebygaming/opencv_tutorials/tree/master/ 005_real_time
以上程序专为 Windows 设计(因为它使用
win32gui
、win32ui
和 win32con
来捕获给定的应用程序)。
我已经设法将这段代码转换成与达尔文兼容的代码。
我的主要 python 文件如下所示:
# main.py
import cv2 as cv
from time import time
from windowcapture import WindowCapture
from vision import Vision
# initialize the WindowCapture class
wincap = WindowCapture('Blue Box Clicker')
# initialize the Vision class
vision = Vision('blue_box.png')
loop_time = time()
while(True):
# get an updated image of the game
screenshot = wincap.get_image_from_window()
# display the processed image
points = vision.find(screenshot, 0.8, 'rectangles')
# cv.imshow('Computer Vision', screenshot)
# debug the loop rate
print('FPS {}'.format(1 / (time() - loop_time)))
loop_time = time()
# hold 'q' with the output window focused to exit.
# waits 1 ms every loop to process key presses
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
我的“愿景”代码:
# vision.py
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCOEFF_NORMED):
# load the image we're trying to match
# https://docs.opencv.org/4.2.0/d4/da8/group__imgcodecs.html
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
# There are 6 methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
self.method = method
def find(self, haystack_img, threshold=0.5, debug_mode=None):
# run the OpenCV algorithm
print("haystack_img shape:", haystack_img.shape)
print("needle_img shape:", self.needle_img.shape)
# # Reduce the size of the haystack image to speed up the algorithm
# haystack_img = cv.resize(haystack_img, None, fx=0.5, fy=0.5)
result = cv.matchTemplate(haystack_img, self.needle_img, self.method)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
# Scale the locations back up to the original image size
# locations = [(int(x * 2), int(y * 2)) for (x, y) in locations]
#print(locations)
# You'll notice a lot of overlapping rectangles get drawn. We can eliminate those redundant
# locations by using groupRectangles().
# First we need to create the list of [x, y, w, h] rectangles
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
rectangles.append(rect)
rectangles.append(rect)
# Apply group rectangles.
# The groupThreshold parameter should usually be 1. If you put it at 0 then no grouping is
# done. If you put it at 2 then an object needs at least 3 overlapping rectangles to appear
# in the result. I've set eps to 0.5, which is:
# "Relative difference between sides of the rectangles to merge them into a group."
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
# print(rectangles)
points = []
if len(rectangles):
#print('Found needle.')
line_color = (0, 255, 0)
thickness = 2
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the box position
top_left = (x, y)
bottom_right = (x + w, y + h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, line_color, thickness)
cv.imshow('Matches', haystack_img)
return points
我的窗口捕获代码:
# windowcapture.py
import numpy as np
import Quartz as QZ
class WindowCapture:
# properties
window_name = None
window = None
window_id = None
window_width = 0
window_height = 0
# constructor
def __init__(self, given_window_name=None):
if given_window_name is not None:
self.window_name = given_window_name
self.window = self.get_window()
if self.window is None:
raise Exception('Unable to find window: {}'.format(given_window_name))
self.window_id = self.get_window_id()
self.window_width = self.get_window_width()
self.window_height = self.get_window_height()
self.window_x = self.get_window_pos_x()
self.window_y = self.get_window_pos_y()
else:
raise Exception('No window name given')
def get_window(self):
windows = QZ.CGWindowListCopyWindowInfo(QZ.kCGWindowListOptionAll, QZ.kCGNullWindowID)
for window in windows:
name = window.get('kCGWindowName', 'Unknown')
if name and self.window_name in name:
return window
return None
def get_window_id(self):
return self.window['kCGWindowNumber']
def get_window_width(self):
return int(self.window['kCGWindowBounds']['Width'])
def get_window_height(self):
return int(self.window['kCGWindowBounds']['Height'])
def get_window_pos_x(self):
return int(self.window['kCGWindowBounds']['X'])
def get_window_pos_y(self):
return int(self.window['kCGWindowBounds']['Y'])
def get_image_from_window(self):
core_graphics_image = QZ.CGWindowListCreateImage(
QZ.CGRectNull,
QZ.kCGWindowListOptionIncludingWindow,
self.window_id,
QZ.kCGWindowImageBoundsIgnoreFraming | QZ.kCGWindowImageNominalResolution
)
bytes_per_row = QZ.CGImageGetBytesPerRow(core_graphics_image)
width = QZ.CGImageGetWidth(core_graphics_image)
height = QZ.CGImageGetHeight(core_graphics_image)
core_graphics_data_provider = QZ.CGImageGetDataProvider(core_graphics_image)
core_graphics_data = QZ.CGDataProviderCopyData(core_graphics_data_provider)
np_raw_data = np.frombuffer(core_graphics_data, dtype=np.uint8)
numpy_data = np.lib.stride_tricks.as_strided(np_raw_data,
shape=(height, width, 3),
strides=(bytes_per_row, 4, 1),
writeable=False)
final_output = np.ascontiguousarray(numpy_data, dtype=np.uint8)
return final_output
(这是它试图捕获的程序)
# game.py
import pygame
import random
# Set up the game window
pygame.init()
window_width, window_height = 640, 480
window = pygame.display.set_mode((window_width, window_height))
pygame.display.set_caption("Blue Box Clicker")
# Set up the clock
clock = pygame.time.Clock()
# Set up the game variables
background_color = (0, 0, 0)
box_color = (0, 0, 255)
box_width, box_height = 50, 50
box_x, box_y = 0, 0
# Set up the game loop
running = True
while running:
# Handle events
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
elif event.type == pygame.MOUSEBUTTONDOWN:
mouse_x, mouse_y = pygame.mouse.get_pos()
if box_x <= mouse_x <= box_x + box_width and box_y <= mouse_y <= box_y + box_height:
# Correct click
box_x, box_y = random.randint(
0, window_width - box_width), random.randint(0, window_height - box_height)
# Incorrect click
# Draw the background
window.fill(background_color)
# Draw the box
pygame.draw.rect(window, box_color, (box_x, box_y, box_width, box_height))
# Update the window
pygame.display.update()
# Limit the frame rate
clock.tick(60)
# Clean up
pygame.quit()
“针”图像是这样的(应该是 50x50 像素,就像在我的“游戏”中一样):
然而,
Vision.find
的下面一行极其缓慢:
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
所以帧率(当我在我们发现的“东西”周围添加矩形边框后尝试显示图像时在控制台中记录的帧率非常慢(0.06855509347693751 FPS)。
这是什么原因?
我正在关注他的教程的那位先生能够以大约 6 FPS 的速度在他的 Windows 机器上运行它:
https://youtu.be/7k4j-uL8WSQ?list=PL1m2M8LQlzfKtkKq2lK5xko4X-8EZzFPI&t=446
我预计 M1 Pro MacBook 的性能会很好。
当我在
points = vision.find(screenshot, 0.8, 'rectangles')
文件中注释掉 main.py
行,并将其替换为 cv.imshow('Computer Vision', screenshot)
时,它以大约 60 FPS 的速度运行,这正是我想要的。
cv.matchTemplate(haystack_img, self.needle_img, self.method)
我的vision.py
文件非常快,我只是不明白为什么其余代码这么慢。
此外,我在我的
Activity Monitor
应用程序中看不到python程序使用大量CPU使用率。
该进程的 CPU 为 100%,但总体 CPU 使用率很小:
我真的不知道从这里该做什么,我不明白为什么我的代码很慢。
谁能帮我理解和调试这个?
这是我的 Python 设置:
> which python
/Users/myUserName/miniconda3/bin/python
> python -V
Python 3.10.9
编辑:
按照建议,我已经通过探查器运行了我的代码,这行的输出是:
cProfile.run('vision.find(screenshot, 0.8, \'rectangles\')')
是:
> python main.py
haystack_img shape: (508, 640, 3)
needle_img shape: (50, 50, 3)
542552 function calls in 340.455 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.001 0.001 <__array_function__ internals>:177(where)
1 0.007 0.007 340.455 340.455 <string>:1(<module>)
1 0.000 0.000 0.000 0.000 multiarray.py:341(where)
1 0.140 0.140 340.448 340.448 vision.py:26(find)
1 0.000 0.000 340.455 340.455 {built-in method builtins.exec}
1 0.000 0.000 0.000 0.000 {built-in method builtins.len}
2 0.000 0.000 0.000 0.000 {built-in method builtins.print}
1 0.001 0.001 0.001 0.001 {built-in method numpy.core._multiarray_umath.implement_array_function}
1 340.225 340.225 340.225 340.225 {groupRectangles}
1 0.045 0.045 0.045 0.045 {imshow}
1 0.017 0.017 0.017 0.017 {matchTemplate}
542538 0.019 0.000 0.019 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.001 0.001 0.001 0.001 {rectangle}
FPS 0.0029370571426249603