我需要在一个大整数矩阵中找到一个总和最大的矩形。例如,有一个 O(n^3) 时间算法,如here和here所述。
这些都工作得很好,但速度很慢,部分原因是Python。例如,对于 800 x 800 矩阵,代码可以加速多少?在我的电脑上需要 56 秒。
这是我的示例代码,它基于 geeksforgeeks 的代码:
import numpy as np
def kadane(arr, start, finish, n):
# initialize subarray_sum, max_subarray_sum and
subarray_sum = 0
max_subarray_sum = float('-inf')
i = None
# Just some initial value to check
# for all negative values case
finish = -1
# local variable
local_start = 0
for i in range(n):
subarray_sum += arr[i]
if subarray_sum < 0:
subarray_sum = 0
local_start = i + 1
elif subarray_sum > max_subarray_sum:
max_subarray_sum = subarray_sum
start = local_start
finish = i
# There is at-least one
# non-negative number
if finish != -1:
return max_subarray_sum, start, finish
# Special Case: When all numbers
# in arr[] are negative
max_subarray_sum = arr[0]
start = finish = 0
# Find the maximum element in array
for i in range(1, n):
if arr[i] > max_subarray_sum:
max_subarray_sum = arr[i]
start = finish = i
return max_subarray_sum, start, finish
# The main function that finds maximum subarray_sum rectangle in M
def findMaxsubarray_sum(M):
num_rows, num_cols = M.shape
# Variables to store the final output
max_subarray_sum, finalLeft = float('-inf'), None
finalRight, finalTop, finalBottom = None, None, None
left, right, i = None, None, None
temp = [None] * num_rows
subarray_sum = 0
start = 0
finish = 0
# Set the left column
for left in range(num_cols):
# Initialize all elements of temp as 0
temp = np.zeros(num_rows, dtype=np.int_)
# Set the right column for the left
# column set by outer loop
for right in range(left, num_cols):
temp += M[:num_rows, right]
#print(temp, start, finish, num_rows)
subarray_sum, start, finish = kadane(temp, start, finish, num_rows)
# Compare subarray_sum with maximum subarray_sum so far.
# If subarray_sum is more, then update maxsubarray_sum
# and other output values
if subarray_sum > max_subarray_sum:
max_subarray_sum = subarray_sum
finalLeft = left
finalRight = right
finalTop = start
finalBottom = finish
# final values
print("(Top, Left)", "(", finalTop, finalLeft, ")")
print("(Bottom, Right)", "(", finalBottom, finalRight, ")")
print("Max subarray_sum is:", max_subarray_sum)
# np.random.seed(40)
square = np.random.randint(-3, 4, (800, 800))
# print(square)
%timeit findMaxsubarray_sum(square)
可以使用 numba 或 pythran 或并行化或更好地使用 numpy 来大大加快速度吗?理想情况下,我希望它需要不到一秒钟。
据称有一种更快的算法,但我不知道实现起来有多难。
按照您已经建议的那样,只需进行很少的更改并使用 numba,该代码就可以在大约 1.3 秒内在我的机器上以 800x800 矩阵运行。
我把你的
float('-inf')
换成了 -np.inf
让 numba 开心,然后在每个函数上打了 numba.njit(cache=True)
,得到了结果。我可能会再弄乱一点,看看是否可以挤出一点更好的性能。
这是我运行的代码:
import numpy as np
import time
import numba
numba.config.DISABLE_JIT = False
def _gt(s=0.0):
return time.perf_counter() - s
@numba.njit(cache=True)
def kadane(arr, start, finish, n):
# initialize subarray_sum, max_subarray_sum and
subarray_sum = 0
max_subarray_sum = -np.inf
i = None
# Just some initial value to check
# for all negative values case
finish = -1
# local variable
local_start = 0
for i in range(n):
subarray_sum += arr[i]
if subarray_sum < 0:
subarray_sum = 0
local_start = i + 1
elif subarray_sum > max_subarray_sum:
max_subarray_sum = subarray_sum
start = local_start
finish = i
# There is at-least one
# non-negative number
if finish != -1:
return max_subarray_sum, start, finish
# Special Case: When all numbers
# in arr[] are negative
max_subarray_sum = arr[0]
start = finish = 0
# Find the maximum element in array
for i in range(1, n):
if arr[i] > max_subarray_sum:
max_subarray_sum = arr[i]
start = finish = i
return max_subarray_sum, start, finish
# The main function that finds maximum subarray_sum rectangle in M
@numba.njit(cache=True)
def findMaxsubarray_sum(M):
num_rows, num_cols = M.shape
# Variables to store the final output
max_subarray_sum, finalLeft = -np.inf, None
finalRight, finalTop, finalBottom = None, None, None
left, right, i = None, None, None
temp = [None] * num_rows
subarray_sum = 0
start = 0
finish = 0
# Set the left column
for left in range(num_cols):
# Initialize all elements of temp as 0
temp = np.zeros(num_rows, dtype=np.int_)
# Set the right column for the left
# column set by outer loop
for right in range(left, num_cols):
temp += M[:num_rows, right]
# print(temp, start, finish, num_rows)
subarray_sum, start, finish = kadane(temp, start, finish, num_rows)
# Compare subarray_sum with maximum subarray_sum so far.
# If subarray_sum is more, then update maxsubarray_sum
# and other output values
if subarray_sum > max_subarray_sum:
max_subarray_sum = subarray_sum
finalLeft = left
finalRight = right
finalTop = start
finalBottom = finish
# final values
if True:
print("(Top, Left)", "(", finalTop, finalLeft, ")")
print("(Bottom, Right)", "(", finalBottom, finalRight, ")")
print("Max subarray_sum is:", max_subarray_sum)
def _main():
rng = np.random.default_rng(seed=42)
N = 800
square = rng.integers(-3, 4, (N, N))
s = _gt()
findMaxsubarray_sum(square)
print(f'Run time: {N},{_gt(s):8.6f}')
if __name__ == '__main__':
_main()
这是我得到的结果:
(Top, Left) ( 26 315 )
(Bottom, Right) ( 256 798 )
Max subarray_sum is: 1991.0
Run time: 800,1.285898